diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 9afba07021..105efd30f6 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -9,6 +9,70 @@ use super::*;
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    simd_xor(simd_xor(a, b), c)
+}
+
 /// Absolute difference between the arguments of Floating
 #[inline]
 #[target_feature(enable = "neon")]
@@ -35,6 +99,22 @@ pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     vabdq_f64_(a, b)
 }
 
+/// Floating-point absolute difference
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 {
+    simd_extract(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point absolute difference
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 {
+    simd_extract(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
 /// Unsigned Absolute difference Long
 #[inline]
 #[target_feature(enable = "neon")]
@@ -162,6 +242,38 @@ pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     simd_eq(a, b)
 }
 
+/// Compare bitwise equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 {
+    transmute(vceq_s64(transmute(a), transmute(b)))
+}
+
+/// Compare bitwise equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 {
+    transmute(vceq_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub unsafe fn vceqd_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
 /// Signed compare bitwise equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
@@ -378,6 +490,38 @@ pub unsafe fn vceqzq_f64(a: float64x2_t) -> uint64x2_t {
     simd_eq(a, transmute(b))
 }
 
+/// Compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+pub unsafe fn vceqzd_s64(a: i64) -> u64 {
+    transmute(vceqz_s64(transmute(a)))
+}
+
+/// Compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+pub unsafe fn vceqzd_u64(a: u64) -> u64 {
+    transmute(vceqz_u64(transmute(a)))
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub unsafe fn vceqzs_f32(a: f32) -> u32 {
+    simd_extract(vceqz_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub unsafe fn vceqzd_f64(a: f64) -> u64 {
+    simd_extract(vceqz_f64(vdup_n_f64(a)), 0)
+}
+
 /// Signed compare bitwise Test bits nonzero
 #[inline]
 #[target_feature(enable = "neon")]
@@ -438,6 +582,168 @@ pub unsafe fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     simd_ne(c, transmute(d))
 }
 
+/// Compare bitwise test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
+    transmute(vtst_s64(transmute(a), transmute(b)))
+}
+
+/// Compare bitwise test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
+    transmute(vtst_u64(transmute(a), transmute(b)))
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v8i8")]
+        fn vuqadd_s8_(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
+    }
+    vuqadd_s8_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v4i16")]
+        fn vuqadd_s16_(a: int16x4_t, b: uint16x4_t) -> int16x4_t;
+    }
+    vuqadd_s16_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v2i32")]
+        fn vuqadd_s32_(a: int32x2_t, b: uint32x2_t) -> int32x2_t;
+    }
+    vuqadd_s32_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v1i64")]
+        fn vuqadd_s64_(a: int64x1_t, b: uint64x1_t) -> int64x1_t;
+    }
+    vuqadd_s64_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v16i8")]
+        fn vuqaddq_s8_(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
+    }
+    vuqaddq_s8_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v8i16")]
+        fn vuqaddq_s16_(a: int16x8_t, b: uint16x8_t) -> int16x8_t;
+    }
+    vuqaddq_s16_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v4i32")]
+        fn vuqaddq_s32_(a: int32x4_t, b: uint32x4_t) -> int32x4_t;
+    }
+    vuqaddq_s32_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.v2i64")]
+        fn vuqaddq_s64_(a: int64x2_t, b: uint64x2_t) -> int64x2_t;
+    }
+    vuqaddq_s64_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.i32")]
+        fn vuqadds_s32_(a: i32, b: u32) -> i32;
+    }
+    vuqadds_s32_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.i64")]
+        fn vuqaddd_s64_(a: i64, b: u64) -> i64;
+    }
+    vuqaddd_s64_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddb_s8(a: i8, b: u8) -> i8 {
+    simd_extract(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddh_s16(a: i16, b: u16) -> i16 {
+    simd_extract(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0)
+}
+
 /// Floating-point absolute value
 #[inline]
 #[target_feature(enable = "neon")]
@@ -502,6 +808,38 @@ pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     simd_gt(a, b)
 }
 
+/// Compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 {
+    transmute(vcgt_s64(transmute(a), transmute(b)))
+}
+
+/// Compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhi))]
+pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 {
+    transmute(vcgt_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub unsafe fn vcgtd_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
 /// Compare signed less than
 #[inline]
 #[target_feature(enable = "neon")]
@@ -550,6 +888,38 @@ pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     simd_lt(a, b)
 }
 
+/// Compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 {
+    transmute(vclt_s64(transmute(a), transmute(b)))
+}
+
+/// Compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhi))]
+pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 {
+    transmute(vclt_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub unsafe fn vcltd_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
 /// Compare signed less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
@@ -566,6 +936,38 @@ pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
     simd_le(a, b)
 }
 
+/// Compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 {
+    transmute(vcge_s64(transmute(a), transmute(b)))
+}
+
+/// Compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 {
+    transmute(vcge_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub unsafe fn vcged_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
 /// Compare unsigned less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
@@ -598,23 +1000,55 @@ pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     simd_le(a, b)
 }
 
-/// Compare signed greater than or equal
+/// Compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
-pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    simd_ge(a, b)
+pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 {
+    transmute(vcle_s64(transmute(a), transmute(b)))
 }
 
-/// Compare signed greater than or equal
+/// Compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
-pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    simd_ge(a, b)
+pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 {
+    transmute(vcle_u64(transmute(a), transmute(b)))
 }
 
-/// Compare unsigned greater than or equal
+/// Floating-point compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub unsafe fn vcled_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Compare signed greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
+}
+
+/// Compare signed greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
+}
+
+/// Compare unsigned greater than or equal
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmhs))]
@@ -754,6 +1188,30 @@ pub unsafe fn vcgezq_f64(a: float64x2_t) -> uint64x2_t {
     simd_ge(a, transmute(b))
 }
 
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+pub unsafe fn vcgezd_s64(a: i64) -> u64 {
+    transmute(vcgez_s64(transmute(a)))
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub unsafe fn vcgezs_f32(a: f32) -> u32 {
+    simd_extract(vcgez_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub unsafe fn vcgezd_f64(a: f64) -> u64 {
+    simd_extract(vcgez_f64(vdup_n_f64(a)), 0)
+}
+
 /// Compare signed greater than zero
 #[inline]
 #[target_feature(enable = "neon")]
@@ -862,6 +1320,30 @@ pub unsafe fn vcgtzq_f64(a: float64x2_t) -> uint64x2_t {
     simd_gt(a, transmute(b))
 }
 
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+pub unsafe fn vcgtzd_s64(a: i64) -> u64 {
+    transmute(vcgtz_s64(transmute(a)))
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub unsafe fn vcgtzs_f32(a: f32) -> u32 {
+    simd_extract(vcgtz_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub unsafe fn vcgtzd_f64(a: f64) -> u64 {
+    simd_extract(vcgtz_f64(vdup_n_f64(a)), 0)
+}
+
 /// Compare signed less than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
@@ -970,6 +1452,30 @@ pub unsafe fn vclezq_f64(a: float64x2_t) -> uint64x2_t {
     simd_le(a, transmute(b))
 }
 
+/// Compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+pub unsafe fn vclezd_s64(a: i64) -> u64 {
+    transmute(vclez_s64(transmute(a)))
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmle))]
+pub unsafe fn vclezs_f32(a: f32) -> u32 {
+    simd_extract(vclez_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmle))]
+pub unsafe fn vclezd_f64(a: f64) -> u64 {
+    simd_extract(vclez_f64(vdup_n_f64(a)), 0)
+}
+
 /// Compare signed less than zero
 #[inline]
 #[target_feature(enable = "neon")]
@@ -1078,6 +1584,30 @@ pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
     simd_lt(a, transmute(b))
 }
 
+/// Compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshr))]
+pub unsafe fn vcltzd_s64(a: i64) -> u64 {
+    transmute(vcltz_s64(transmute(a)))
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmlt))]
+pub unsafe fn vcltzs_f32(a: f32) -> u32 {
+    simd_extract(vcltz_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmlt))]
+pub unsafe fn vcltzd_f64(a: f64) -> u64 {
+    simd_extract(vcltz_f64(vdup_n_f64(a)), 0)
+}
+
 /// Floating-point absolute compare greater than
 #[inline]
 #[target_feature(enable = "neon")]
@@ -1104,6 +1634,32 @@ pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     vcagtq_f64_(a, b)
 }
 
+/// Floating-point absolute compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.i32.f32")]
+        fn vcagts_f32_(a: f32, b: f32) -> u32;
+    }
+    vcagts_f32_(a, b)
+}
+
+/// Floating-point absolute compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.i64.f64")]
+        fn vcagtd_f64_(a: f64, b: f64) -> u64;
+    }
+    vcagtd_f64_(a, b)
+}
+
 /// Floating-point absolute compare greater than or equal
 #[inline]
 #[target_feature(enable = "neon")]
@@ -1130,6 +1686,32 @@ pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     vcageq_f64_(a, b)
 }
 
+/// Floating-point absolute compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.i32.f32")]
+        fn vcages_f32_(a: f32, b: f32) -> u32;
+    }
+    vcages_f32_(a, b)
+}
+
+/// Floating-point absolute compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.i64.f64")]
+        fn vcaged_f64_(a: f64, b: f64) -> u64;
+    }
+    vcaged_f64_(a, b)
+}
+
 /// Floating-point absolute compare less than
 #[inline]
 #[target_feature(enable = "neon")]
@@ -1146,6 +1728,22 @@ pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     vcagtq_f64(b, a)
 }
 
+/// Floating-point absolute compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub unsafe fn vcalts_f32(a: f32, b: f32) -> u32 {
+    vcagts_f32(b, a)
+}
+
+/// Floating-point absolute compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub unsafe fn vcaltd_f64(a: f64, b: f64) -> u64 {
+    vcagtd_f64(b, a)
+}
+
 /// Floating-point absolute compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
@@ -1162,6 +1760,22 @@ pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
     vcageq_f64(b, a)
 }
 
+/// Floating-point absolute compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+pub unsafe fn vcales_f32(a: f32, b: f32) -> u32 {
+    vcages_f32(b, a)
+}
+
+/// Floating-point absolute compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+pub unsafe fn vcaled_f64(a: f64, b: f64) -> u64 {
+    vcaged_f64(b, a)
+}
+
 /// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
@@ -2110,6 +2724,14 @@ pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
     vcvtx_f32_f64_(a)
 }
 
+/// Floating-point convert to lower precision narrow, rounding to odd
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 {
+    simd_extract(vcvtx_f32_f64(vdupq_n_f64(a)), 0)
+}
+
 /// Floating-point convert to lower precision narrow, rounding to odd
 #[inline]
 #[target_feature(enable = "neon")]
@@ -3892,6 +4514,14 @@ pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
     simd_neg(a)
 }
 
+/// Negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vnegd_s64(a: i64) -> i64 {
+    -a
+}
+
 /// Negate
 #[inline]
 #[target_feature(enable = "neon")]
@@ -3934,6 +4564,38 @@ pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
     vqnegq_s64_(a)
 }
 
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegb_s8(a: i8) -> i8 {
+    simd_extract(vqneg_s8(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegh_s16(a: i16) -> i16 {
+    simd_extract(vqneg_s16(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegs_s32(a: i32) -> i32 {
+    simd_extract(vqneg_s32(vdup_n_s32(a)), 0)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegd_s64(a: i64) -> i64 {
+    simd_extract(vqneg_s64(vdup_n_s64(a)), 0)
+}
+
 /// Saturating subtract
 #[inline]
 #[target_feature(enable = "neon")]
@@ -4026,6 +4688,60 @@ pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
     vqsubd_s64_(a, b)
 }
 
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(raddhn2))]
+pub unsafe fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x: int8x8_t = vraddhn_s16(b, c);
+    simd_shuffle16!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(raddhn2))]
+pub unsafe fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x: int16x4_t = vraddhn_s32(b, c);
+    simd_shuffle8!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(raddhn2))]
+pub unsafe fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x: int32x2_t = vraddhn_s64(b, c);
+    simd_shuffle4!(a, x, [0, 1, 2, 3])
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(raddhn2))]
+pub unsafe fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let x: uint8x8_t = vraddhn_u16(b, c);
+    simd_shuffle16!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(raddhn2))]
+pub unsafe fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let x: uint16x4_t = vraddhn_u32(b, c);
+    simd_shuffle8!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(raddhn2))]
+pub unsafe fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let x: uint32x2_t = vraddhn_u64(b, c);
+    simd_shuffle4!(a, x, [0, 1, 2, 3])
+}
+
 /// Reverse bit order
 #[inline]
 #[target_feature(enable = "neon")]
@@ -4214,6 +4930,19 @@ pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
     vrndnq_f64_(a)
 }
 
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndns_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.roundeven.f32")]
+        fn vrndns_f32_(a: f32) -> f32;
+    }
+    vrndns_f32_(a)
+}
+
 /// Floating-point round to integral, toward minus infinity
 #[inline]
 #[target_feature(enable = "neon")]
@@ -6896,41 +7625,112 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     simd_sub(a, b)
 }
 
-/// Signed Add Long across Vector
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
-pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
-        fn vaddlv_s16_(a: int16x4_t) -> i32;
-    }
-    vaddlv_s16_(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
+    a - b
 }
 
-/// Signed Add Long across Vector
+/// Subtract
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
-pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
-        fn vaddlvq_s16_(a: int16x8_t) -> i32;
-    }
-    vaddlvq_s16_(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
+    a - b
 }
 
-/// Signed Add Long across Vector
+/// Add
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlp))]
-pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
-        fn vaddlv_s32_(a: int32x2_t) -> i64;
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
+    a + b
+}
+
+/// Add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
+    a + b
+}
+
+/// Floating-point add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddv))]
+pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f32.v2f32")]
+        fn vaddv_f32_(a: float32x2_t) -> f32;
+    }
+    vaddv_f32_(a)
+}
+
+/// Floating-point add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddv))]
+pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f32.v4f32")]
+        fn vaddvq_f32_(a: float32x4_t) -> f32;
+    }
+    vaddvq_f32_(a)
+}
+
+/// Floating-point add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddv))]
+pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f64.v2f64")]
+        fn vaddvq_f64_(a: float64x2_t) -> f64;
+    }
+    vaddvq_f64_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
+        fn vaddlv_s16_(a: int16x4_t) -> i32;
+    }
+    vaddlv_s16_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
+        fn vaddlvq_s16_(a: int16x8_t) -> i32;
+    }
+    vaddlvq_s16_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlp))]
+pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
+        fn vaddlv_s32_(a: int32x2_t) -> i64;
     }
     vaddlv_s32_(a)
 }
@@ -7126,9309 +7926,11995 @@ pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     simd_sub(d, f)
 }
 
-/// Maximum (vector)
+/// Bit clear and exclusive OR
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmax))]
-pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v1f64")]
-        fn vmax_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    vmax_f64_(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    simd_xor(a, vbicq_s8(b, c))
 }
 
-/// Maximum (vector)
+/// Bit clear and exclusive OR
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmax))]
-pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    simd_xor(a, vbicq_s16(b, c))
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    simd_xor(a, vbicq_s32(b, c))
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    simd_xor(a, vbicq_s64(b, c))
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    simd_xor(a, vbicq_u8(b, c))
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    simd_xor(a, vbicq_u16(b, c))
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    simd_xor(a, vbicq_u32(b, c))
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    simd_xor(a, vbicq_u64(b, c))
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f64")]
-        fn vmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32")]
+        fn vcadd_rot270_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    vmaxq_f64_(a, b)
+    vcadd_rot270_f32_(a, b)
 }
 
-/// Floating-point Maximun Number (vector)
+/// Floating-point complex add
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnm))]
-pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
-        fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32")]
+        fn vcaddq_rot270_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-    vmaxnm_f64_(a, b)
+    vcaddq_rot270_f32_(a, b)
 }
 
-/// Floating-point Maximun Number (vector)
+/// Floating-point complex add
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnm))]
-pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
-        fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64")]
+        fn vcaddq_rot270_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vmaxnmq_f64_(a, b)
+    vcaddq_rot270_f64_(a, b)
 }
 
-/// Floating-point Maximum Number Pairwise (vector).
+/// Floating-point complex add
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
-        fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32")]
+        fn vcadd_rot90_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    vpmaxnm_f32_(a, b)
+    vcadd_rot90_f32_(a, b)
 }
 
-/// Floating-point Maximum Number Pairwise (vector).
+/// Floating-point complex add
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
-        fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32")]
+        fn vcaddq_rot90_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-    vpmaxnmq_f64_(a, b)
+    vcaddq_rot90_f32_(a, b)
 }
 
-/// Floating-point Maximum Number Pairwise (vector).
+/// Floating-point complex add
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
-        fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64")]
+        fn vcaddq_rot90_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vpmaxnmq_f32_(a, b)
+    vcaddq_rot90_f64_(a, b)
 }
 
-/// Minimum (vector)
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmin))]
-pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v1f64")]
-        fn vmin_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32")]
+        fn vcmla_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
-    vmin_f64_(a, b)
+    vcmla_f32_(a, b, c)
 }
 
-/// Minimum (vector)
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmin))]
-pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f64")]
-        fn vminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32")]
+        fn vcmlaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
-    vminq_f64_(a, b)
+    vcmlaq_f32_(a, b, c)
 }
 
-/// Floating-point Minimun Number (vector)
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnm))]
-pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
-        fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64")]
+        fn vcmlaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
-    vminnm_f64_(a, b)
+    vcmlaq_f64_(a, b, c)
 }
 
-/// Floating-point Minimun Number (vector)
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnm))]
-pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
-        fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32")]
+        fn vcmla_rot90_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
-    vminnmq_f64_(a, b)
+    vcmla_rot90_f32_(a, b, c)
 }
 
-/// Floating-point Minimum Number Pairwise (vector).
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
-pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
-        fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32")]
+        fn vcmlaq_rot90_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
-    vpminnm_f32_(a, b)
+    vcmlaq_rot90_f32_(a, b, c)
 }
 
-/// Floating-point Minimum Number Pairwise (vector).
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
-pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
-        fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64")]
+        fn vcmlaq_rot90_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
-    vpminnmq_f64_(a, b)
+    vcmlaq_rot90_f64_(a, b, c)
 }
 
-/// Floating-point Minimum Number Pairwise (vector).
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
-pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
-        fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32")]
+        fn vcmla_rot180_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
-    vpminnmq_f32_(a, b)
+    vcmla_rot180_f32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull))]
-pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract(vqdmull_s16(a, b), 0)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32")]
+        fn vcmlaq_rot180_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    vcmlaq_rot180_f32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull))]
-pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
-        fn vqdmulls_s32_(a: i32, b: i32) -> i64;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64")]
+        fn vcmlaq_rot180_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
-    vqdmulls_s32_(a, b)
+    vcmlaq_rot180_f64_(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
-pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
-    vqdmull_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32")]
+        fn vcmla_rot270_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    vcmla_rot270_f32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
-pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
-    vqdmull_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32")]
+        fn vcmlaq_rot270_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    vcmlaq_rot270_f32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
-pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmull_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64")]
+        fn vcmlaq_rot270_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    vcmlaq_rot270_f64_(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
-pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmull_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_f32(a, b, c)
 }
 
-/// Vector saturating doubling long multiply by scalar
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(N);
-    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_f32(a, b, c)
 }
 
-/// Vector saturating doubling long multiply by scalar
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(N);
-    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
-    static_assert_imm2!(N);
-    let b: i16 = simd_extract(b, N as u32);
-    vqdmullh_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
-    static_assert_imm3!(N);
-    let b: i16 = simd_extract(b, N as u32);
-    vqdmullh_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot90_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot90_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
-    static_assert_imm1!(N);
-    let b: i32 = simd_extract(b, N as u32);
-    vqdmulls_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot90_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot90_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
-    static_assert_imm2!(N);
-    let b: i32 = simd_extract(b, N as u32);
-    vqdmulls_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot90_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot90_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(N);
-    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot90_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot90_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(N);
-    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot180_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot180_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(N);
-    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot180_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot180_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(N);
-    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot180_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot180_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
-pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_high_s16(b, c))
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot180_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot180_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
-pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_high_s32(b, c))
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot270_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot270_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
-pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot270_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot270_f32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
-pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot270_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot270_f32(a, b, c)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
+/// Floating-point complex multiply accumulate
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(N);
-    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+pub unsafe fn vcmlaq_rot270_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot270_f32(a, b, c)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(N);
-    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
+        fn vdot_s32_(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    vdot_s32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(N);
-    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
+        fn vdotq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    vdotq_s32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(N);
-    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot))]
+pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
+        fn vdot_u32_(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
+    }
+    vdot_u32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(N);
-    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot))]
+pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
+        fn vdotq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+    vdotq_u32_(a, b, c)
 }
 
-/// Signed saturating doubling multiply-add long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(N);
-    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    let c: int8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_s32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
-pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_high_s16(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    let c: int8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_s32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
-pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_high_s32(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    let c: int8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_s32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
-pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    let c: int8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_s32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
-pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    static_assert_imm1!(LANE);
+    let c: uint8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_u32(a, b, c)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(N);
-    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+pub unsafe fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
+    static_assert_imm2!(LANE);
+    let c: uint8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_u32(a, b, c)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(N);
-    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
+    static_assert_imm1!(LANE);
+    let c: uint8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_u32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Dot product arithmetic
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
-    static_assert_imm2!(N);
-    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+pub unsafe fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    let c: uint8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_u32(a, b, c)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    static_assert_imm3!(N);
-    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+#[cfg_attr(test, assert_instr(fmax))]
+pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v1f64")]
+        fn vmax_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vmax_f64_(a, b)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Maximum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
-    static_assert_imm1!(N);
-    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+#[cfg_attr(test, assert_instr(fmax))]
+pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f64")]
+        fn vmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vmaxq_f64_(a, b)
 }
 
-/// Signed saturating doubling multiply-subtract long
+/// Floating-point Maximun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    static_assert_imm2!(N);
-    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
+        fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vmaxnm_f64_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Floating-point Maximun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh))]
-pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract(vqdmulh_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
+        fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vmaxnmq_f64_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Floating-point maximum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh))]
-pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    simd_extract(vqdmulh_s32(a, b), 0)
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
+        fn vmaxnmv_f32_(a: float32x2_t) -> f32;
+    }
+    vmaxnmv_f32_(a)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Floating-point maximum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
-    static_assert_imm2!(N);
-    let b: i16 = simd_extract(b, N as u32);
-    vqdmulhh_s16(a, b)
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32")]
+        fn vmaxnmvq_f32_(a: float32x4_t) -> f32;
+    }
+    vmaxnmvq_f32_(a)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Floating-point maximum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
-    static_assert_imm3!(N);
-    let b: i16 = simd_extract(b, N as u32);
-    vqdmulhh_s16(a, b)
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
+        fn vmaxnmvq_f64_(a: float64x2_t) -> f64;
+    }
+    vmaxnmvq_f64_(a)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Floating-point Maximum Number Pairwise (vector).
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
-    static_assert_imm1!(N);
-    let b: i32 = simd_extract(b, N as u32);
-    vqdmulhs_s32(a, b)
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
+        fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vpmaxnm_f32_(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
+/// Floating-point Maximum Number Pairwise (vector).
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
-    static_assert_imm2!(N);
-    let b: i32 = simd_extract(b, N as u32);
-    vqdmulhs_s32(a, b)
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
+        fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vpmaxnmq_f64_(a, b)
 }
 
-/// Saturating extract narrow
+/// Floating-point Maximum Number Pairwise (vector).
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
-pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
-    simd_extract(vqmovn_s16(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
+        fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vpmaxnmq_f32_(a, b)
 }
 
-/// Saturating extract narrow
+/// Floating-point maximum number pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
-pub unsafe fn vqmovns_s32(a: i32) -> i16 {
-    simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
+        fn vpmaxnms_f32_(a: float32x2_t) -> f32;
+    }
+    vpmaxnms_f32_(a)
 }
 
-/// Saturating extract narrow
+/// Floating-point maximum number pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
-pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
-    simd_extract(vqmovn_u16(vdupq_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
+        fn vpmaxnmqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpmaxnmqd_f64_(a)
 }
 
-/// Saturating extract narrow
+/// Floating-point maximum pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
-pub unsafe fn vqmovns_u32(a: u32) -> u16 {
-    simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32")]
+        fn vpmaxs_f32_(a: float32x2_t) -> f32;
+    }
+    vpmaxs_f32_(a)
 }
 
-/// Saturating extract narrow
+/// Floating-point maximum pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
-pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
-        fn vqmovnd_s64_(a: i64) -> i32;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64")]
+        fn vpmaxqd_f64_(a: float64x2_t) -> f64;
     }
-    vqmovnd_s64_(a)
+    vpmaxqd_f64_(a)
 }
 
-/// Saturating extract narrow
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
-pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
+#[cfg_attr(test, assert_instr(fmin))]
+pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
-        fn vqmovnd_u64_(a: u64) -> u32;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v1f64")]
+        fn vmin_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    vqmovnd_u64_(a)
+    vmin_f64_(a, b)
 }
 
-/// Signed saturating extract narrow
+/// Minimum (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
-pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    simd_shuffle16!(a, vqmovn_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(fmin))]
+pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f64")]
+        fn vminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vminq_f64_(a, b)
 }
 
-/// Signed saturating extract narrow
+/// Floating-point Minimun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
-pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    simd_shuffle8!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(fminnm))]
+pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
+        fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vminnm_f64_(a, b)
 }
 
-/// Signed saturating extract narrow
+/// Floating-point Minimun Number (vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
-pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    simd_shuffle4!(a, vqmovn_s64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(fminnm))]
+pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
+        fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vminnmq_f64_(a, b)
 }
 
-/// Signed saturating extract narrow
+/// Floating-point minimum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
-pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    simd_shuffle16!(a, vqmovn_u16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(fminnmv))]
+pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
+        fn vminnmv_f32_(a: float32x2_t) -> f32;
+    }
+    vminnmv_f32_(a)
 }
 
-/// Signed saturating extract narrow
+/// Floating-point minimum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
-pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    simd_shuffle8!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(fminnmv))]
+pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32")]
+        fn vminnmvq_f32_(a: float32x4_t) -> f32;
+    }
+    vminnmvq_f32_(a)
 }
 
-/// Signed saturating extract narrow
+/// Floating-point minimum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
-pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    simd_shuffle4!(a, vqmovn_u64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(fminnmv))]
+pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
+        fn vminnmvq_f64_(a: float64x2_t) -> f64;
+    }
+    vminnmvq_f64_(a)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
-pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
-    simd_extract(vqmovun_s16(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(sshll2))]
+pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_s8(a)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
-pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
-    simd_extract(vqmovun_s32(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(sshll2))]
+pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vmovl_s16(a)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
-pub unsafe fn vqmovund_s64(a: i64) -> u32 {
-    simd_extract(vqmovun_s64(vdupq_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(sshll2))]
+pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vmovl_s32(a)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
-pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    simd_shuffle16!(a, vqmovun_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(ushll2))]
+pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_u8(a)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
-pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    simd_shuffle8!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(ushll2))]
+pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vmovl_u16(a)
 }
 
-/// Signed saturating extract unsigned narrow
+/// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
-pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    simd_shuffle4!(a, vqmovun_s64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(ushll2))]
+pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vmovl_u32(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Floating-point add pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
-    simd_extract(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v4f32")]
+        fn vpaddq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vpaddq_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Floating-point add pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
-    simd_extract(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
-}
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v2f64")]
+        fn vpaddq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vpaddq_f64_(a, b)
+}
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Floating-point add pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
-    static_assert_imm2!(LANE);
-    vqrdmulhh_s16(a, simd_extract(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
+    let a1: f32 = simd_extract(a, 0);
+    let a2: f32 = simd_extract(a, 1);
+    a1 + a2
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Floating-point add pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
-    static_assert_imm3!(LANE);
-    vqrdmulhh_s16(a, simd_extract(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
+    let a1: f64 = simd_extract(a, 0);
+    let a2: f64 = simd_extract(a, 1);
+    a1 + a2
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Floating-point Minimum Number Pairwise (vector).
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
-    static_assert_imm1!(LANE);
-    vqrdmulhs_s32(a, simd_extract(b, LANE as u32))
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
+        fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vpminnm_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
+/// Floating-point Minimum Number Pairwise (vector).
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
-    static_assert_imm2!(LANE);
-    vqrdmulhs_s32(a, simd_extract(b, LANE as u32))
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
+        fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vpminnmq_f64_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Floating-point Minimum Number Pairwise (vector).
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
-    vqaddh_s16(a, vqrdmulhh_s16(b, c))
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
+        fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vpminnmq_f32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Floating-point minimum number pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
-    vqadds_s32(a, vqrdmulhs_s32(b, c))
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
+        fn vpminnms_f32_(a: float32x2_t) -> f32;
+    }
+    vpminnms_f32_(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Floating-point minimum number pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
-    static_assert_imm2!(LANE);
-    vqaddh_s16(a, vqrdmulhh_lane_s16::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
+        fn vpminnmqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpminnmqd_f64_(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Floating-point minimum pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
-    static_assert_imm3!(LANE);
-    vqaddh_s16(a, vqrdmulhh_laneq_s16::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminv.f32.v2f32")]
+        fn vpmins_f32_(a: float32x2_t) -> f32;
+    }
+    vpmins_f32_(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Floating-point minimum pairwise
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
-    static_assert_imm1!(LANE);
-    vqadds_s32(a, vqrdmulhs_lane_s32::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminv.f64.v2f64")]
+        fn vpminqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpminqd_f64_(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
-    static_assert_imm2!(LANE);
-    vqadds_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(sqdmull))]
+pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqdmull_s16(a, b), 0)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
-    vqsubh_s16(a, vqrdmulhh_s16(b, c))
+#[cfg_attr(test, assert_instr(sqdmull))]
+pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
+        fn vqdmulls_s32_(a: i32, b: i32) -> i64;
+    }
+    vqdmulls_s32_(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
-pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
-    vqsubs_s32(a, vqrdmulhs_s32(b, c))
+#[cfg_attr(test, assert_instr(sqdmull2))]
+pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
-    static_assert_imm2!(LANE);
-    vqsubh_s16(a, vqrdmulhh_lane_s16::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(sqdmull2))]
+pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
-    static_assert_imm3!(LANE);
-    vqsubh_s16(a, vqrdmulhh_laneq_s16::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(sqdmull2))]
+pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmull_s16(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
-    static_assert_imm1!(LANE);
-    vqsubs_s32(a, vqrdmulhs_lane_s32::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(sqdmull2))]
+pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmull_s32(a, b)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
+/// Vector saturating doubling long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
-    static_assert_imm2!(LANE);
-    vqsubs_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed saturating rounding shift left
+/// Vector saturating doubling long multiply by scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
-pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
-        fn vqrshls_s32_(a: i32, b: i32) -> i32;
-    }
-    vqrshls_s32_(a, b)
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
-pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
-        fn vqrshld_s64_(a: i64, b: i64) -> i64;
-    }
-    vqrshld_s64_(a, b)
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
+    static_assert_imm2!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmullh_s16(a, b)
 }
 
-/// Signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
-pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract(vqrshl_s8(a, b), 0)
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
+    static_assert_imm3!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmullh_s16(a, b)
 }
 
-/// Signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
-pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract(vqrshl_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
+    static_assert_imm1!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulls_s32(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
-pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
-        fn vqrshls_u32_(a: u32, b: i32) -> u32;
-    }
-    vqrshls_u32_(a, b)
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
+    static_assert_imm2!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulls_s32(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
-pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
-        fn vqrshld_u64_(a: u64, b: i64) -> u64;
-    }
-    vqrshld_u64_(a, b)
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
-pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract(vqrshl_u8(a, b), 0)
+#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Unsigned signed saturating rounding shift left
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
-pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract(vqrshl_u16(a, b), 0)
+#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    let a: int16x8_t = vdupq_n_s16(a);
-    simd_extract(vqrshrn_n_s16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    let a: int32x4_t = vdupq_n_s32(a);
-    simd_extract(vqrshrn_n_s32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_s16(b, c))
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    let a: int64x2_t = vdupq_n_s64(a);
-    simd_extract(vqrshrn_n_s64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_s32(b, c))
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vqrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
 }
 
-/// Signed saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
 }
 
-/// Signed saturating rounded shift right narrow
+/// Vector widening saturating doubling multiply accumulate with scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
 }
 
-/// Unsigned saturating rounded shift right narrow
+/// Vector widening saturating doubling multiply accumulate with scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    let a: uint16x8_t = vdupq_n_u16(a);
-    simd_extract(vqrshrn_n_u16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
 }
 
-/// Unsigned saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    let a: uint32x4_t = vdupq_n_u32(a);
-    simd_extract(vqrshrn_n_u32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
 }
 
-/// Unsigned saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    let a: uint64x2_t = vdupq_n_u64(a);
-    simd_extract(vqrshrn_n_u64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
 }
 
-/// Unsigned saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vqrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
 }
 
-/// Unsigned saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
 }
 
-/// Unsigned saturating rounded shift right narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(sqdmull))]
+pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqadds_s32(a, simd_extract(x, 0))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    let a: int16x8_t = vdupq_n_s16(a);
-    simd_extract(vqrshrun_n_s16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmull))]
+pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: int64x2_t = vqdmull_s32(vdup_n_s32(b), vdup_n_s32(c));
+    vqaddd_s64(a, simd_extract(x, 0))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    let a: int32x4_t = vdupq_n_s32(a);
-    simd_extract(vqrshrun_n_s32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqdmlalh_s16(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    let a: int64x2_t = vdupq_n_s64(a);
-    simd_extract(vqrshrun_n_s64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_imm3!(LANE);
+    vqdmlalh_s16(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vqrshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_imm1!(LANE);
+    vqdmlals_s32(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_imm2!(LANE);
+    vqdmlals_s32(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_s16(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
-        fn vqshld_s64_(a: i64, b: i64) -> i64;
-    }
-    vqshld_s64_(a, b)
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_s32(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
-    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
-    simd_extract(c, 0)
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
-    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
-    simd_extract(c, 0)
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
 }
 
-/// Signed saturating shift left
+/// Vector widening saturating doubling multiply subtract with scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
-    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
-    simd_extract(c, 0)
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Vector widening saturating doubling multiply subtract with scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
-pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
-        fn vqshld_u64_(a: u64, b: i64) -> u64;
-    }
-    vqshld_u64_(a, b)
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
-pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
-    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
-    simd_extract(c, 0)
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
-pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
-    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
-    simd_extract(c, 0)
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
-pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
-    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
-    simd_extract(c, 0)
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
-    static_assert_imm3!(N);
-    simd_extract(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
-    static_assert_imm4!(N);
-    simd_extract(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmull))]
+pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqsubs_s32(a, simd_extract(x, 0))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
-    static_assert_imm5!(N);
-    simd_extract(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmull))]
+pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: int64x2_t = vqdmull_s32(vdup_n_s32(b), vdup_n_s32(c));
+    vqsubd_s64(a, simd_extract(x, 0))
 }
 
-/// Signed saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert_imm6!(N);
-    simd_extract(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqdmlslh_s16(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
-    static_assert_imm3!(N);
-    simd_extract(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_imm3!(LANE);
+    vqdmlslh_s16(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
-    static_assert_imm4!(N);
-    simd_extract(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_imm1!(LANE);
+    vqdmlsls_s32(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
-    static_assert_imm5!(N);
-    simd_extract(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_imm2!(LANE);
+    vqdmlsls_s32(a, b, simd_extract(c, LANE as u32))
 }
 
-/// Unsigned saturating shift left
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert_imm6!(N);
-    simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqdmulh_s16(a, b), 0)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
-        fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
-    }
-    vqshrnd_n_s64_(a, N)
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    simd_extract(vqdmulh_s32(a, b), 0)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_extract(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_imm2!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmulhh_s16(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_imm3!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmulhh_s16(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vqshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_imm1!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulhs_s32(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Signed saturating doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_imm2!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulhs_s32(a, b)
 }
 
-/// Signed saturating shift right narrow
+/// Vector saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
+pub unsafe fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract(b, LANE as u32)))
 }
 
-/// Unsigned saturating shift right narrow
+/// Vector saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
-        fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
-    }
-    vqshrnd_n_u64_(a, N)
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract(b, LANE as u32)))
 }
 
-/// Unsigned saturating shift right narrow
+/// Vector saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_extract(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract(b, LANE as u32)))
 }
 
-/// Unsigned saturating shift right narrow
+/// Vector saturating doubling multiply high by scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract(b, LANE as u32)))
 }
 
-/// Unsigned saturating shift right narrow
+/// Saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vqshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(sqxtn))]
+pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
+    simd_extract(vqmovn_s16(vdupq_n_s16(a)), 0)
 }
 
-/// Unsigned saturating shift right narrow
+/// Saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(sqxtn))]
+pub unsafe fn vqmovns_s32(a: i32) -> i16 {
+    simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0)
 }
 
-/// Unsigned saturating shift right narrow
+/// Saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(uqxtn))]
+pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
+    simd_extract(vqmovn_u16(vdupq_n_u16(a)), 0)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_extract(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(uqxtn))]
+pub unsafe fn vqmovns_u32(a: u32) -> u16 {
+    simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_extract(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(sqxtn))]
+pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
+        fn vqmovnd_s64_(a: i64) -> i32;
+    }
+    vqmovnd_s64_(a)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_extract(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(uqxtn))]
+pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
+        fn vqmovnd_u64_(a: u64) -> u32;
+    }
+    vqmovnd_u64_(a)
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vqshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(sqxtn2))]
+pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    simd_shuffle16!(a, vqmovn_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(sqxtn2))]
+pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    simd_shuffle8!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating shift right unsigned narrow
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(sqxtn2))]
+pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    simd_shuffle4!(a, vqmovn_s64(b), [0, 1, 2, 3])
 }
 
-/// Calculates the square root of each lane.
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
-pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
-    simd_fsqrt(a)
+#[cfg_attr(test, assert_instr(uqxtn2))]
+pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    simd_shuffle16!(a, vqmovn_u16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Calculates the square root of each lane.
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
-pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
-    simd_fsqrt(a)
+#[cfg_attr(test, assert_instr(uqxtn2))]
+pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    simd_shuffle8!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Calculates the square root of each lane.
+/// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
-pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
-    simd_fsqrt(a)
+#[cfg_attr(test, assert_instr(uqxtn2))]
+pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    simd_shuffle4!(a, vqmovn_u64(b), [0, 1, 2, 3])
 }
 
-/// Calculates the square root of each lane.
+/// Signed saturating extract unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
-pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
-    simd_fsqrt(a)
+#[cfg_attr(test, assert_instr(sqxtun))]
+pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
+    simd_extract(vqmovun_s16(vdupq_n_s16(a)), 0)
 }
 
-/// Reciprocal square-root estimate.
+/// Signed saturating extract unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
-pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
-        fn vrsqrte_f64_(a: float64x1_t) -> float64x1_t;
-    }
-    vrsqrte_f64_(a)
+#[cfg_attr(test, assert_instr(sqxtun))]
+pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
+    simd_extract(vqmovun_s32(vdupq_n_s32(a)), 0)
 }
 
-/// Reciprocal square-root estimate.
+/// Signed saturating extract unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
-pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
-        fn vrsqrteq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrsqrteq_f64_(a)
+#[cfg_attr(test, assert_instr(sqxtun))]
+pub unsafe fn vqmovund_s64(a: i64) -> u32 {
+    simd_extract(vqmovun_s64(vdupq_n_s64(a)), 0)
 }
 
-/// Reciprocal estimate.
+/// Signed saturating extract unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
-pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v1f64")]
-        fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
-    }
-    vrecpe_f64_(a)
+#[cfg_attr(test, assert_instr(sqxtun2))]
+pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    simd_shuffle16!(a, vqmovun_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Reciprocal estimate.
+/// Signed saturating extract unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
-pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f64")]
-        fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrecpeq_f64_(a)
+#[cfg_attr(test, assert_instr(sqxtun2))]
+pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    simd_shuffle8!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating extract unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqxtun2))]
+pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    simd_shuffle4!(a, vqmovun_s64(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
+    simd_extract(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
+    simd_extract(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_imm2!(LANE);
+    vqrdmulhh_s16(a, simd_extract(b, LANE as u32))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_imm3!(LANE);
+    vqrdmulhh_s16(a, simd_extract(b, LANE as u32))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_imm1!(LANE);
+    vqrdmulhs_s32(a, simd_extract(b, LANE as u32))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqrdmulhs_s32(a, simd_extract(b, LANE as u32))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
+    vqaddh_s16(a, vqrdmulhh_s16(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
+    vqadds_s32(a, vqrdmulhs_s32(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_imm2!(LANE);
+    vqaddh_s16(a, vqrdmulhh_lane_s16::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_imm3!(LANE);
+    vqaddh_s16(a, vqrdmulhh_laneq_s16::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
+    static_assert_imm1!(LANE);
+    vqadds_s32(a, vqrdmulhs_lane_s32::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply accumulate returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqadds_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
+    vqsubh_s16(a, vqrdmulhh_s16(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
+    vqsubs_s32(a, vqrdmulhs_s32(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_imm2!(LANE);
+    vqsubh_s16(a, vqrdmulhh_lane_s16::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_imm3!(LANE);
+    vqsubh_s16(a, vqrdmulhh_laneq_s16::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
+    static_assert_imm1!(LANE);
+    vqsubs_s32(a, vqrdmulhs_lane_s32::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding doubling multiply subtract returning high half
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqsubs_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
+        fn vqrshls_s32_(a: i32, b: i32) -> i32;
+    }
+    vqrshls_s32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
+        fn vqrshld_s64_(a: i64, b: i64) -> i64;
+    }
+    vqrshld_s64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract(vqrshl_s8(a, b), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqrshl_s16(a, b), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
+        fn vqrshls_u32_(a: u32, b: i32) -> u32;
+    }
+    vqrshls_u32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
+        fn vqrshld_u64_(a: u64, b: i64) -> u64;
+    }
+    vqrshld_u64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract(vqrshl_u8(a, b), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned signed saturating rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqrshl_u16(a, b), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract(vqrshrn_n_s16::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract(vqrshrn_n_s32::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract(vqrshrn_n_s64::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    let a: uint16x8_t = vdupq_n_u16(a);
+    simd_extract(vqrshrn_n_u16::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    let a: uint32x4_t = vdupq_n_u32(a);
+    simd_extract(vqrshrn_n_u32::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    let a: uint64x2_t = vdupq_n_u64(a);
+    simd_extract(vqrshrn_n_u64::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating rounded shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract(vqrshrun_n_s16::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract(vqrshrun_n_s32::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract(vqrshrun_n_s64::<N>(a), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqrshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating rounded shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl))]
+pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
+        fn vqshld_s64_(a: i64, b: i64) -> i64;
+    }
+    vqshld_s64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl))]
+pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
+    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
+    simd_extract(c, 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl))]
+pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
+    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
+    simd_extract(c, 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl))]
+pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
+    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
+    simd_extract(c, 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl))]
+pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
+        fn vqshld_u64_(a: u64, b: i64) -> u64;
+    }
+    vqshld_u64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl))]
+pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
+    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
+    simd_extract(c, 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl))]
+pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
+    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
+    simd_extract(c, 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl))]
+pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
+    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
+    simd_extract(c, 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
+    static_assert_imm3!(N);
+    simd_extract(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
+    static_assert_imm4!(N);
+    simd_extract(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
+    static_assert_imm5!(N);
+    simd_extract(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert_imm6!(N);
+    simd_extract(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
+    static_assert_imm3!(N);
+    simd_extract(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
+    static_assert_imm4!(N);
+    simd_extract(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
+    static_assert_imm5!(N);
+    simd_extract(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert_imm6!(N);
+    simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
+        fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
+    }
+    vqshrnd_n_s64_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_extract(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
+        fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
+    }
+    vqshrnd_n_u64_(a, N)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_extract(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_extract(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_extract(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_extract(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
+/// Signed saturating shift right unsigned narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating accumulate of signed value
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 {
+    simd_extract(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating accumulate of signed value
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
+    simd_extract(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating accumulate of signed value
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usqadd.i32")]
+        fn vsqadds_u32_(a: u32, b: i32) -> u32;
+    }
+    vsqadds_u32_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Unsigned saturating accumulate of signed value
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usqadd.i64")]
+        fn vsqaddd_u64_(a: u64, b: i64) -> u64;
+    }
+    vsqaddd_u64_(a, b)
 }
 
-/// Vector reinterpret cast operation
+/// Calculates the square root of each lane.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
+/// Calculates the square root of each lane.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
+/// Calculates the square root of each lane.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
+/// Calculates the square root of each lane.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal square-root estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(frsqrte))]
+pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
+        fn vrsqrte_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrsqrte_f64_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal square-root estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(frsqrte))]
+pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
+        fn vrsqrteq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrsqrteq_f64_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal square-root estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(frsqrte))]
+pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.f32")]
+        fn vrsqrtes_f32_(a: f32) -> f32;
+    }
+    vrsqrtes_f32_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Reciprocal square-root estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(frsqrte))]
+pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.f64")]
+        fn vrsqrted_f64_(a: f64) -> f64;
+    }
+    vrsqrted_f64_(a)
 }
 
-/// Vector reinterpret cast operation
+/// Floating-point reciprocal square root step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(frsqrts))]
+pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v1f64")]
+        fn vrsqrts_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vrsqrts_f64_(a, b)
 }
 
-/// Signed rounding shift left
+/// Floating-point reciprocal square root step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshl))]
-pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
+#[cfg_attr(test, assert_instr(frsqrts))]
+pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
-        fn vrshld_s64_(a: i64, b: i64) -> i64;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v2f64")]
+        fn vrsqrtsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vrshld_s64_(a, b)
+    vrsqrtsq_f64_(a, b)
 }
 
-/// Unsigned rounding shift left
+/// Floating-point reciprocal square root step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshl))]
-pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
+#[cfg_attr(test, assert_instr(frsqrts))]
+pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
-        fn vrshld_u64_(a: u64, b: i64) -> u64;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.f32")]
+        fn vrsqrtss_f32_(a: f32, b: f32) -> f32;
     }
-    vrshld_u64_(a, b)
+    vrsqrtss_f32_(a, b)
 }
 
-/// Signed rounding shift right
+/// Floating-point reciprocal square root step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    vrshld_s64(a, -N as i64)
+#[cfg_attr(test, assert_instr(frsqrts))]
+pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.f64")]
+        fn vrsqrtsd_f64_(a: f64, b: f64) -> f64;
+    }
+    vrsqrtsd_f64_(a, b)
 }
 
-/// Unsigned rounding shift right
+/// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    vrshld_u64(a, -N as i64)
+#[cfg_attr(test, assert_instr(frecpe))]
+pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v1f64")]
+        fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrecpe_f64_(a)
 }
 
-/// Rounding shift right narrow
+/// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(frecpe))]
+pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f64")]
+        fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrecpeq_f64_(a)
 }
 
-/// Rounding shift right narrow
+/// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(frecpe))]
+pub unsafe fn vrecpes_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.f32")]
+        fn vrecpes_f32_(a: f32) -> f32;
+    }
+    vrecpes_f32_(a)
 }
 
-/// Rounding shift right narrow
+/// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(frecpe))]
+pub unsafe fn vrecped_f64(a: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.f64")]
+        fn vrecped_f64_(a: f64) -> f64;
+    }
+    vrecped_f64_(a)
 }
 
-/// Rounding shift right narrow
+/// Floating-point reciprocal step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(frecps))]
+pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v1f64")]
+        fn vrecps_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vrecps_f64_(a, b)
 }
 
-/// Rounding shift right narrow
+/// Floating-point reciprocal step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(frecps))]
+pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v2f64")]
+        fn vrecpsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vrecpsq_f64_(a, b)
 }
 
-/// Rounding shift right narrow
+/// Floating-point reciprocal step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(frecps))]
+pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.f32")]
+        fn vrecpss_f32_(a: f32, b: f32) -> f32;
+    }
+    vrecpss_f32_(a, b)
 }
 
-/// Signed rounding shift right and accumulate.
+/// Floating-point reciprocal step
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    let b: i64 = vrshrd_n_s64::<N>(b);
-    a + b
+#[cfg_attr(test, assert_instr(frecps))]
+pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.f64")]
+        fn vrecpsd_f64_(a: f64, b: f64) -> f64;
+    }
+    vrecpsd_f64_(a, b)
 }
 
-/// Ungisned rounding shift right and accumulate.
+/// Floating-point reciprocal exponent
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N : i32 where N >= 1 && N <= 64);
-    let b: u64 = vrshrd_n_u64::<N>(b);
-    a + b
+#[cfg_attr(test, assert_instr(frecpx))]
+pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpx.f32")]
+        fn vrecpxs_f32_(a: f32) -> f32;
+    }
+    vrecpxs_f32_(a)
 }
 
-/// Insert vector element from another vector element
+/// Floating-point reciprocal exponent
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE : i32 where LANE == 0);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(frecpx))]
+pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpx.f64")]
+        fn vrecpxd_f64_(a: f64) -> f64;
+    }
+    vrecpxd_f64_(a)
 }
 
-/// Insert vector element from another vector element
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
-    static_assert_imm1!(LANE);
-    simd_insert(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed Shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshl))]
-pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
-    transmute(vshl_s64(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Unsigned Shift left
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushl))]
-pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
-    transmute(vshl_u64(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed shift left long
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 0 && N <= 8);
-    let b: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vshll_n_s8::<N>(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed shift left long
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 0 && N <= 16);
-    let b: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
-    vshll_n_s16::<N>(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed shift left long
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
-    static_assert!(N : i32 where N >= 0 && N <= 32);
-    let b: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
-    vshll_n_s32::<N>(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Signed shift left long
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 0 && N <= 8);
-    let b: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vshll_n_u8::<N>(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed shift left long
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 0 && N <= 16);
-    let b: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
-    vshll_n_u16::<N>(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed shift left long
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
-pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
-    static_assert!(N : i32 where N >= 0 && N <= 32);
-    let b: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
-    vshll_n_u32::<N>(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N : i32 where N >= 1 && N <= 8);
-    simd_shuffle16!(a, vshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N : i32 where N >= 1 && N <= 16);
-    simd_shuffle8!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Shift right narrow
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N : i32 where N >= 1 && N <= 32);
-    simd_shuffle4!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle4!(a, b, [0, 4, 2, 6])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle4!(a, b, [0, 4, 2, 6])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle4!(a, b, [0, 4, 2, 6])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle4!(a, b, [0, 4, 2, 6])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle4!(a, b, [0, 4, 2, 6])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn1))]
-pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle4!(a, b, [0, 4, 2, 6])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle4!(a, b, [1, 5, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle4!(a, b, [1, 5, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle4!(a, b, [1, 5, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle4!(a, b, [1, 5, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle4!(a, b, [1, 5, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(trn2))]
-pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle4!(a, b, [1, 5, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Transpose vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle4!(a, b, [0, 4, 1, 5])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle4!(a, b, [0, 4, 1, 5])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle4!(a, b, [0, 4, 1, 5])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle4!(a, b, [0, 4, 1, 5])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle4!(a, b, [0, 4, 1, 5])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle4!(a, b, [0, 4, 1, 5])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle4!(a, b, [2, 6, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle4!(a, b, [2, 6, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle4!(a, b, [2, 6, 3, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Zip vectors
+/// Signed rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(srshl))]
+pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
+        fn vrshld_s64_(a: i64, b: i64) -> i64;
+    }
+    vrshld_s64_(a, b)
 }
 
-/// Zip vectors
+/// Unsigned rounding shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle4!(a, b, [2, 6, 3, 7])
+#[cfg_attr(test, assert_instr(urshl))]
+pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
+        fn vrshld_u64_(a: u64, b: i64) -> u64;
+    }
+    vrshld_u64_(a, b)
 }
 
-/// Zip vectors
+/// Signed rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshld_s64(a, -N as i64)
 }
 
-/// Zip vectors
+/// Unsigned rounding shift right
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshld_u64(a, -N as i64)
 }
 
-/// Zip vectors
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Zip vectors
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle4!(a, b, [2, 6, 3, 7])
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Zip vectors
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Zip vectors
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Zip vectors
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Zip vectors
+/// Rounding shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle4!(a, b, [2, 6, 3, 7])
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Zip vectors
+/// Signed rounding shift right and accumulate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    let b: i64 = vrshrd_n_s64::<N>(b);
+    a + b
 }
 
-/// Unzip vectors
+/// Ungisned rounding shift right and accumulate.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(test, assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    let b: u64 = vrshrd_n_u64::<N>(b);
+    a + b
 }
 
-/// Unzip vectors
+/// Rounding subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+#[cfg_attr(test, assert_instr(rsubhn2))]
+pub unsafe fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x: int8x8_t = vrsubhn_s16(b, c);
+    simd_shuffle16!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Unzip vectors
+/// Rounding subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle4!(a, b, [0, 2, 4, 6])
+#[cfg_attr(test, assert_instr(rsubhn2))]
+pub unsafe fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x: int16x4_t = vrsubhn_s32(b, c);
+    simd_shuffle8!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Unzip vectors
+/// Rounding subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(test, assert_instr(rsubhn2))]
+pub unsafe fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x: int32x2_t = vrsubhn_s64(b, c);
+    simd_shuffle4!(a, x, [0, 1, 2, 3])
 }
 
-/// Unzip vectors
+/// Rounding subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle4!(a, b, [0, 2, 4, 6])
+#[cfg_attr(test, assert_instr(rsubhn2))]
+pub unsafe fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let x: uint8x8_t = vrsubhn_u16(b, c);
+    simd_shuffle16!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Unzip vectors
+/// Rounding subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(test, assert_instr(rsubhn2))]
+pub unsafe fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let x: uint16x4_t = vrsubhn_u32(b, c);
+    simd_shuffle8!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Unzip vectors
+/// Rounding subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+#[cfg_attr(test, assert_instr(rsubhn2))]
+pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let x: uint32x2_t = vrsubhn_u64(b, c);
+    simd_shuffle4!(a, x, [0, 1, 2, 3])
 }
 
-/// Unzip vectors
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle4!(a, b, [0, 2, 4, 6])
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Unzip vectors
+/// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
 }
 
-/// Unzip vectors
+/// Signed Shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle4!(a, b, [0, 2, 4, 6])
+#[cfg_attr(test, assert_instr(sshl))]
+pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
+    transmute(vshl_s64(transmute(a), transmute(b)))
 }
 
-/// Unzip vectors
+/// Unsigned Shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(test, assert_instr(ushl))]
+pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
+    transmute(vshl_u64(transmute(a), transmute(b)))
 }
 
-/// Unzip vectors
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 0 && N <= 8);
+    let b: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_s8::<N>(b)
 }
 
-/// Unzip vectors
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle4!(a, b, [0, 2, 4, 6])
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 0 && N <= 16);
+    let b: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vshll_n_s16::<N>(b)
 }
 
-/// Unzip vectors
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 0 && N <= 32);
+    let b: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vshll_n_s32::<N>(b)
 }
 
-/// Unzip vectors
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 0 && N <= 8);
+    let b: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_u8::<N>(b)
 }
 
-/// Unzip vectors
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 0 && N <= 16);
+    let b: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vshll_n_u16::<N>(b)
 }
 
-/// Unzip vectors
+/// Signed shift left long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 0 && N <= 32);
+    let b: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vshll_n_u32::<N>(b)
 }
 
-/// Unzip vectors
+/// Shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Unzip vectors
+/// Shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Unzip vectors
+/// Shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp1))]
-pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle4!(a, b, [0, 2, 4, 6])
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Unzip vectors
+/// Shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Unzip vectors
+/// Shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip1))]
-pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle2!(a, b, [0, 2])
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Unzip vectors
+/// Shift right narrow
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle4!(a, b, [1, 3, 5, 7])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle4!(a, b, [1, 3, 5, 7])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle4!(a, b, [1, 3, 5, 7])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle4!(a, b, [1, 3, 5, 7])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle4!(a, b, [1, 3, 5, 7])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uzp2))]
-pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle4!(a, b, [1, 3, 5, 7])
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(trn1))]
+pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
-/// Unzip vectors
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(zip2))]
-pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle2!(a, b, [1, 3])
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabal))]
-pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    let d: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let e: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: uint8x8_t = vabd_u8(d, e);
-    simd_add(a, simd_cast(f))
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabal))]
-pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    let d: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
-    let e: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
-    let f: uint16x4_t = vabd_u16(d, e);
-    simd_add(a, simd_cast(f))
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabal))]
-pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    let d: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
-    let e: uint32x2_t = simd_shuffle2!(c, c, [2, 3]);
-    let f: uint32x2_t = vabd_u32(d, e);
-    simd_add(a, simd_cast(f))
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sabal))]
-pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    let d: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let e: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: int8x8_t = vabd_s8(d, e);
-    let f: uint8x8_t = simd_cast(f);
-    simd_add(a, simd_cast(f))
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sabal))]
-pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    let d: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
-    let e: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
-    let f: int16x4_t = vabd_s16(d, e);
-    let f: uint16x4_t = simd_cast(f);
-    simd_add(a, simd_cast(f))
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sabal))]
-pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    let d: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
-    let e: int32x2_t = simd_shuffle2!(c, c, [2, 3]);
-    let f: int32x2_t = vabd_s32(d, e);
-    let f: uint32x2_t = simd_cast(f);
-    simd_add(a, simd_cast(f))
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
 }
 
-/// Singned saturating Absolute value
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqabs))]
-pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v1i64")]
-        fn vqabs_s64_(a: int64x1_t) -> int64x1_t;
-    }
-    vqabs_s64_(a)
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Singned saturating Absolute value
+/// Transpose vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqabs))]
-pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i64")]
-        fn vqabsq_s64_(a: int64x2_t) -> int64x2_t;
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabal))]
+pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let d: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: uint8x8_t = vabd_u8(d, e);
+    simd_add(a, simd_cast(f))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabal))]
+pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let d: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let e: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    let f: uint16x4_t = vabd_u16(d, e);
+    simd_add(a, simd_cast(f))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabal))]
+pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let d: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let e: uint32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    let f: uint32x2_t = vabd_u32(d, e);
+    simd_add(a, simd_cast(f))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabal))]
+pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let d: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: int8x8_t = vabd_s8(d, e);
+    let f: uint8x8_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabal))]
+pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let d: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let e: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    let f: int16x4_t = vabd_s16(d, e);
+    let f: uint16x4_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabal))]
+pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let d: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let e: int32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    let f: int32x2_t = vabd_s32(d, e);
+    let f: uint32x2_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v1i64")]
+        fn vqabs_s64_(a: int64x1_t) -> int64x1_t;
+    }
+    vqabs_s64_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i64")]
+        fn vqabsq_s64_(a: int64x2_t) -> int64x2_t;
+    }
+    vqabsq_s64_(a)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub unsafe fn vqabsb_s8(a: i8) -> i8 {
+    simd_extract(vqabs_s8(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub unsafe fn vqabsh_s16(a: i16) -> i16 {
+    simd_extract(vqabs_s16(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub unsafe fn vqabss_s32(a: i32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.i32")]
+        fn vqabss_s32_(a: i32) -> i32;
+    }
+    vqabss_s32_(a)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub unsafe fn vqabsd_s64(a: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.i64")]
+        fn vqabsd_s64_(a: i64) -> i64;
+    }
+    vqabsd_s64_(a)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::core_arch::simd::*;
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_s8() {
+        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: i8x16 = transmute(veor3q_s8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_s16() {
+        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i16x8 = transmute(veor3q_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_s32() {
+        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let c: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i32x4 = transmute(veor3q_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_s64() {
+        let a: i64x2 = i64x2::new(0x00, 0x01);
+        let b: i64x2 = i64x2::new(0x00, 0x00);
+        let c: i64x2 = i64x2::new(0x00, 0x00);
+        let e: i64x2 = i64x2::new(0x00, 0x01);
+        let r: i64x2 = transmute(veor3q_s64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_u8() {
+        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: u8x16 = transmute(veor3q_u8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_u16() {
+        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u16x8 = transmute(veor3q_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_u32() {
+        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let c: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u32x4 = transmute(veor3q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_veor3q_u64() {
+        let a: u64x2 = u64x2::new(0x00, 0x01);
+        let b: u64x2 = u64x2::new(0x00, 0x00);
+        let c: u64x2 = u64x2::new(0x00, 0x00);
+        let e: u64x2 = u64x2::new(0x00, 0x01);
+        let r: u64x2 = transmute(veor3q_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 9.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vabd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(9.0, 3.0);
+        let e: f64x2 = f64x2::new(8.0, 1.0);
+        let r: f64x2 = transmute(vabdq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabds_f32() {
+        let a: f32 = 1.0;
+        let b: f32 = 9.0;
+        let e: f32 = 8.0;
+        let r: f32 = transmute(vabds_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdd_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 9.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vabdd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
+        let e: u16x8 = u16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
+        let r: u16x8 = transmute(vabdl_high_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 8, 9, 11, 12);
+        let b: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
+        let e: u32x4 = u32x4::new(2, 1, 1, 2);
+        let r: u32x4 = transmute(vabdl_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(10, 10, 10, 10);
+        let e: u64x2 = u64x2::new(7, 6);
+        let r: u64x2 = transmute(vabdl_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
+        let e: i16x8 = i16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
+        let r: i16x8 = transmute(vabdl_high_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
+        let e: i32x4 = i32x4::new(1, 0, 1, 2);
+        let r: i32x4 = transmute(vabdl_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(10, 10, 10, 10);
+        let e: i64x2 = i64x2::new(7, 6);
+        let r: i64x2 = transmute(vabdl_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_u64() {
+        let a: u64x2 = u64x2::new(0, 0x01);
+        let b: u64x2 = u64x2::new(0, 0x01);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u64x2 = u64x2::new(0, 0);
+        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_p64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_p64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_f64() {
+        let a: f64 = 1.2;
+        let b: f64 = 1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_f64() {
+        let a: f64x2 = f64x2::new(1.2, 3.4);
+        let b: f64x2 = f64x2::new(1.2, 3.4);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqd_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqd_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqs_f32() {
+        let a: f32 = 1.;
+        let b: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vceqs_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqd_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s8() {
+        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vceqz_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s8() {
+        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vceqzq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s16() {
+        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
+        let e: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
+        let r: u16x4 = transmute(vceqz_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vceqzq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, 0x00);
+        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceqz_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
+        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vceqzq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vceqz_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqzq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_p8() {
+        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vceqz_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_p8() {
+        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vceqzq_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_p64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vceqz_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_p64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqzq_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u8() {
+        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vceqz_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u8() {
+        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vceqzq_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u16() {
+        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
+        let r: u16x4 = transmute(vceqz_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u16() {
+        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vceqzq_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u32() {
+        let a: u32x2 = u32x2::new(0, 0x00);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceqz_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u32() {
+        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vceqzq_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceqz_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u64() {
+        let a: u64x2 = u64x2::new(0, 0x00);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqzq_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_f32() {
+        let a: f32x2 = f32x2::new(0.0, 1.2);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vceqz_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_f32() {
+        let a: f32x4 = f32x4::new(0.0, 1.2, 3.4, 5.6);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
+        let r: u32x4 = transmute(vceqzq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_f64() {
+        let a: f64 = 0.0;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceqz_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_f64() {
+        let a: f64x2 = f64x2::new(0.0, 1.2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqzq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzd_s64() {
+        let a: i64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqzd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzd_u64() {
+        let a: u64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqzd_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzs_f32() {
+        let a: f32 = 1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vceqzs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzd_f64() {
+        let a: f64 = 1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqzd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtst_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vtst_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vtstq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtst_p64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vtst_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstq_p64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vtstq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtst_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vtst_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstq_u64() {
+        let a: u64x2 = u64x2::new(0, 0x00);
+        let b: u64x2 = u64x2::new(0, 0x00);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vtstq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstd_s64() {
+        let a: i64 = 0;
+        let b: i64 = 0;
+        let e: u64 = 0;
+        let r: u64 = transmute(vtstd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstd_u64() {
+        let a: u64 = 0;
+        let b: u64 = 0;
+        let e: u64 = 0;
+        let r: u64 = transmute(vtstd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 1, 2, 3, 4);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 1, 2, 3, 4);
+        let e: i8x8 = i8x8::new(2, 4, 6, 8, 2, 4, 6, 8);
+        let r: i8x8 = transmute(vuqadd_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(2, 4, 6, 8);
+        let r: i16x4 = transmute(vuqadd_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(1, 2);
+        let e: i32x2 = i32x2::new(2, 4);
+        let r: i32x2 = transmute(vuqadd_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: u64x1 = u64x1::new(1);
+        let e: i64x1 = i64x1::new(2);
+        let r: i64x1 = transmute(vuqadd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4);
+        let e: i8x16 = i8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8);
+        let r: i8x16 = transmute(vuqaddq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4);
+        let e: i16x8 = i16x8::new(2, 4, 6, 8, 2, 4, 6, 8);
+        let r: i16x8 = transmute(vuqaddq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: i32x4 = i32x4::new(2, 4, 6, 8);
+        let r: i32x4 = transmute(vuqaddq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: i64x2 = i64x2::new(2, 4);
+        let r: i64x2 = transmute(vuqaddq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadds_s32() {
+        let a: i32 = 1;
+        let b: u32 = 1;
+        let e: i32 = 2;
+        let r: i32 = transmute(vuqadds_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddd_s64() {
+        let a: i64 = 1;
+        let b: u64 = 1;
+        let e: i64 = 2;
+        let r: i64 = transmute(vuqaddd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddb_s8() {
+        let a: i8 = 1;
+        let b: u8 = 2;
+        let e: i8 = 3;
+        let r: i8 = transmute(vuqaddb_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddh_s16() {
+        let a: i16 = 1;
+        let b: u16 = 2;
+        let e: i16 = 3;
+        let r: i16 = transmute(vuqaddh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabs_f64() {
+        let a: f64 = -0.1;
+        let e: f64 = 0.1;
+        let r: f64 = transmute(vabs_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabsq_f64() {
+        let a: f64x2 = f64x2::new(-0.1, -2.2);
+        let e: f64x2 = f64x2::new(0.1, 2.2);
+        let r: f64x2 = transmute(vabsq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_f64() {
+        let a: f64 = 1.2;
+        let b: f64 = 0.1;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_f64() {
+        let a: f64x2 = f64x2::new(1.2, 2.3);
+        let b: f64x2 = f64x2::new(0.1, 1.2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtd_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtd_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgts_f32() {
+        let a: f32 = 1.;
+        let b: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcgts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtd_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let b: i64x1 = i64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_u64() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_f64() {
+        let a: f64 = 0.1;
+        let b: f64 = 1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_f64() {
+        let a: f64x2 = f64x2::new(0.1, 1.2);
+        let b: f64x2 = f64x2::new(1.2, 2.3);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltd_s64() {
+        let a: i64 = 2;
+        let b: i64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltd_u64() {
+        let a: u64 = 2;
+        let b: u64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclts_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vclts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltd_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let b: i64x1 = i64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcged_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcged_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcged_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcged_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcges_f32() {
+        let a: f32 = 1.;
+        let b: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcges_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcged_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcged_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_u64() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_f64() {
+        let a: f64 = 0.1;
+        let b: f64 = 1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_f64() {
+        let a: f64x2 = f64x2::new(0.1, 1.2);
+        let b: f64x2 = f64x2::new(1.2, 2.3);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcled_s64() {
+        let a: i64 = 2;
+        let b: i64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcled_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
     }
-    vqabsq_s64_(a)
-}
 
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::core_arch::simd::*;
-    use std::mem::transmute;
-    use stdarch_test::simd_test;
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcled_u64() {
+        let a: u64 = 2;
+        let b: u64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcled_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 9.0;
-        let e: f64 = 8.0;
-        let r: f64 = transmute(vabd_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcles_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcles_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(9.0, 3.0);
-        let e: f64x2 = f64x2::new(8.0, 1.0);
-        let r: f64x2 = transmute(vabdq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcled_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcled_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u16x8 = u16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
-        let r: u16x8 = transmute(vabdl_high_u8(transmute(a), transmute(b)));
+    unsafe fn test_vcge_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 8, 9, 11, 12);
-        let b: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u32x4 = u32x4::new(2, 1, 1, 2);
-        let r: u32x4 = transmute(vabdl_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(10, 10, 10, 10);
-        let e: u64x2 = u64x2::new(7, 6);
-        let r: u64x2 = transmute(vabdl_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vcge_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i16x8 = i16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
-        let r: i16x8 = transmute(vabdl_high_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcgeq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i32x4 = i32x4::new(1, 0, 1, 2);
-        let r: i32x4 = transmute(vabdl_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vcge_f64() {
+        let a: f64 = 1.2;
+        let b: f64 = 0.1;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_f64() {
+        let a: f64x2 = f64x2::new(1.2, 2.3);
+        let b: f64x2 = f64x2::new(0.1, 1.2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcgez_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgezq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vcgez_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgezq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcgez_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgezq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgez_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcgezq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcgez_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgezq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgez_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgezq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezd_s64() {
+        let a: i64 = -1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgezd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezs_f32() {
+        let a: f32 = -1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcgezs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezd_f64() {
+        let a: f64 = -1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgezd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcgtz_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgtzq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0, 0, 0, 0xFF_FF);
+        let r: u16x4 = transmute(vcgtz_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgtzq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcgtz_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtzq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgtz_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(10, 10, 10, 10);
-        let e: i64x2 = i64x2::new(7, 6);
-        let r: i64x2 = transmute(vabdl_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vcgtzq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcgtzq_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcgtz_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcgtz_f32(transmute(a)));
         assert_eq!(r, e);
+    }
 
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtzq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u64() {
-        let a: u64x2 = u64x2::new(0, 0x01);
-        let b: u64x2 = u64x2::new(0, 0x01);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcgtz_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgtz_f64(transmute(a)));
         assert_eq!(r, e);
+    }
 
-        let a: u64x2 = u64x2::new(0, 0);
-        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcgtzq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcgtzd_s64() {
+        let a: i64 = -1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtzd_s64(transmute(a)));
         assert_eq!(r, e);
+    }
 
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzs_f32() {
+        let a: f32 = -1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcgtzs_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcgtzd_f64() {
+        let a: f64 = -1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtzd_f64(transmute(a)));
         assert_eq!(r, e);
+    }
 
-        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vclez_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_p64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
+    unsafe fn test_vclezq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vclezq_s8(transmute(a)));
         assert_eq!(r, e);
+    }
 
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0);
+        let r: u16x4 = transmute(vclez_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_p64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
+    unsafe fn test_vclezq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vclezq_s16(transmute(a)));
         assert_eq!(r, e);
+    }
 
-        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclez_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_f64() {
-        let a: f64 = 1.2;
-        let b: f64 = 1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vclezq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
+        let r: u32x4 = transmute(vclezq_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_f64() {
-        let a: f64x2 = f64x2::new(1.2, 3.4);
-        let b: f64x2 = f64x2::new(1.2, 3.4);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vclez_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclez_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vceqz_s8(transmute(a)));
+    unsafe fn test_vclezq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vclezq_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vceqzq_s8(transmute(a)));
+    unsafe fn test_vclez_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclez_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
-        let r: u16x4 = transmute(vceqz_s16(transmute(a)));
+    unsafe fn test_vclezq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vclezq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vceqzq_s16(transmute(a)));
+    unsafe fn test_vclez_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclez_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x00);
-        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceqz_s32(transmute(a)));
+    unsafe fn test_vclezq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vclezq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vceqzq_s32(transmute(a)));
+    unsafe fn test_vclezd_s64() {
+        let a: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vclezd_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vceqz_s64(transmute(a)));
+    unsafe fn test_vclezs_f32() {
+        let a: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vclezs_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqzq_s64(transmute(a)));
+    unsafe fn test_vclezd_f64() {
+        let a: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vclezd_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_p8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vceqz_p8(transmute(a)));
+    unsafe fn test_vcltz_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vcltz_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_p8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vceqzq_p8(transmute(a)));
+    unsafe fn test_vcltzq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vcltzq_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_p64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vceqz_p64(transmute(a)));
+    unsafe fn test_vcltz_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
+        let r: u16x4 = transmute(vcltz_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_p64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqzq_p64(transmute(a)));
+    unsafe fn test_vcltzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vcltzq_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u8() {
-        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vceqz_u8(transmute(a)));
+    unsafe fn test_vcltz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcltz_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vceqzq_u8(transmute(a)));
+    unsafe fn test_vcltzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vcltzq_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u16() {
-        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
-        let r: u16x4 = transmute(vceqz_u16(transmute(a)));
+    unsafe fn test_vcltz_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcltz_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vceqzq_u16(transmute(a)));
+    unsafe fn test_vcltzq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltzq_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u32() {
-        let a: u32x2 = u32x2::new(0, 0x00);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceqz_u32(transmute(a)));
+    unsafe fn test_vcltz_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vcltz_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vceqzq_u32(transmute(a)));
+    unsafe fn test_vcltzq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
+        let r: u32x4 = transmute(vcltzq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u64() {
-        let a: u64x1 = u64x1::new(0);
+    unsafe fn test_vcltz_f64() {
+        let a: f64 = -1.2;
         let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceqz_u64(transmute(a)));
+        let r: u64x1 = transmute(vcltz_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u64() {
-        let a: u64x2 = u64x2::new(0, 0x00);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqzq_u64(transmute(a)));
+    unsafe fn test_vcltzq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vcltzq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_f32() {
-        let a: f32x2 = f32x2::new(0.0, 1.2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vceqz_f32(transmute(a)));
+    unsafe fn test_vcltzd_s64() {
+        let a: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltzd_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_f32() {
-        let a: f32x4 = f32x4::new(0.0, 1.2, 3.4, 5.6);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
-        let r: u32x4 = transmute(vceqzq_f32(transmute(a)));
+    unsafe fn test_vcltzs_f32() {
+        let a: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcltzs_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_f64() {
-        let a: f64 = 0.0;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceqz_f64(transmute(a)));
+    unsafe fn test_vcltzd_f64() {
+        let a: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltzd_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_f64() {
-        let a: f64x2 = f64x2::new(0.0, 1.2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqzq_f64(transmute(a)));
+    unsafe fn test_vcagt_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(!0);
+        let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vtst_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcagtq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(!0, 0);
+        let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vtstq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcagts_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = !0;
+        let r: u32 = transmute(vcagts_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_p64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vtst_p64(transmute(a), transmute(b)));
+    unsafe fn test_vcagtd_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = !0;
+        let r: u64 = transmute(vcagtd_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_p64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vtstq_p64(transmute(a), transmute(b)));
+    unsafe fn test_vcage_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(!0);
+        let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vtst_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcageq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(!0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u64() {
-        let a: u64x2 = u64x2::new(0, 0x00);
-        let b: u64x2 = u64x2::new(0, 0x00);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vtstq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcages_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = !0;
+        let r: u32 = transmute(vcages_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_f64() {
-        let a: f64 = -0.1;
-        let e: f64 = 0.1;
-        let r: f64 = transmute(vabs_f64(transmute(a)));
+    unsafe fn test_vcaged_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = !0;
+        let r: u64 = transmute(vcaged_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_f64() {
-        let a: f64x2 = f64x2::new(-0.1, -2.2);
-        let e: f64x2 = f64x2::new(0.1, 2.2);
-        let r: f64x2 = transmute(vabsq_f64(transmute(a)));
+    unsafe fn test_vcalt_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcaltq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcalts_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcalts_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcaltd_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcaltd_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcale_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_f64() {
-        let a: f64 = 1.2;
-        let b: f64 = 0.1;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcaleq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_f64() {
-        let a: f64x2 = f64x2::new(1.2, 2.3);
-        let b: f64x2 = f64x2::new(0.1, 1.2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcales_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcales_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcaled_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcaled_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_lane_s8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_laneq_s8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_lane_s16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_f64() {
-        let a: f64 = 0.1;
-        let b: f64 = 1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_laneq_s16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_f64() {
-        let a: f64x2 = f64x2::new(0.1, 1.2);
-        let b: f64x2 = f64x2::new(1.2, 2.3);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
+        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
+        let r: i32x2 = transmute(vcopy_lane_s32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
+        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
+        let r: i32x4 = transmute(vcopyq_laneq_s32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
+        let r: i64x2 = transmute(vcopyq_laneq_s64::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8x8 = transmute(vcopy_lane_u8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8x16 = transmute(vcopyq_laneq_u8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
+        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
+        let r: u16x4 = transmute(vcopy_lane_u16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_f64() {
-        let a: f64 = 0.1;
-        let b: f64 = 1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u16x8 = transmute(vcopyq_laneq_u16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_f64() {
-        let a: f64x2 = f64x2::new(0.1, 1.2);
-        let b: f64x2 = f64x2::new(1.2, 2.3);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
+        let r: u32x2 = transmute(vcopy_lane_u32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
+        let r: u32x4 = transmute(vcopyq_laneq_u32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 2);
+        let r: u64x2 = transmute(vcopyq_laneq_u64::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_p8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_lane_p8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_laneq_p8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_f64() {
-        let a: f64 = 1.2;
-        let b: f64 = 0.1;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcopy_lane_p16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_lane_p16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_f64() {
-        let a: f64x2 = f64x2::new(1.2, 2.3);
-        let b: f64x2 = f64x2::new(0.1, 1.2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcopyq_laneq_p16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_laneq_p16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgez_s8(transmute(a)));
+    unsafe fn test_vcopyq_laneq_p64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
+        let r: i64x2 = transmute(vcopyq_laneq_p64::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgezq_s8(transmute(a)));
+    unsafe fn test_vcopy_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(0., 0.5);
+        let e: f32x2 = f32x2::new(0.5, 2.);
+        let r: f32x2 = transmute(vcopy_lane_f32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcgez_s16(transmute(a)));
+    unsafe fn test_vcopyq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
+        let e: f32x4 = f32x4::new(0.5, 2., 3., 4.);
+        let r: f32x4 = transmute(vcopyq_laneq_f32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgezq_s16(transmute(a)));
+    unsafe fn test_vcopyq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(0., 0.5);
+        let e: f64x2 = f64x2::new(0.5, 2.);
+        let r: f64x2 = transmute(vcopyq_laneq_f64::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcgez_s32(transmute(a)));
+    unsafe fn test_vcopy_laneq_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_laneq_s8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgezq_s32(transmute(a)));
+    unsafe fn test_vcopy_laneq_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_laneq_s16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgez_s64(transmute(a)));
+    unsafe fn test_vcopy_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
+        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
+        let r: i32x2 = transmute(vcopy_laneq_s32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcgezq_s64(transmute(a)));
+    unsafe fn test_vcopy_laneq_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8x8 = transmute(vcopy_laneq_u8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgez_f32(transmute(a)));
+    unsafe fn test_vcopy_laneq_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
+        let r: u16x4 = transmute(vcopy_laneq_u16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgezq_f32(transmute(a)));
+    unsafe fn test_vcopy_laneq_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
+        let r: u32x2 = transmute(vcopy_laneq_u32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgez_f64(transmute(a)));
+    unsafe fn test_vcopy_laneq_p8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_laneq_p8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgezq_f64(transmute(a)));
+    unsafe fn test_vcopy_laneq_p16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_laneq_p16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgtz_s8(transmute(a)));
+    unsafe fn test_vcopy_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
+        let e: f32x2 = f32x2::new(0.5, 2.);
+        let r: f32x2 = transmute(vcopy_laneq_f32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgtzq_s8(transmute(a)));
+    unsafe fn test_vcopyq_lane_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_lane_s8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0xFF_FF);
-        let r: u16x4 = transmute(vcgtz_s16(transmute(a)));
+    unsafe fn test_vcopyq_lane_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_lane_s16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgtzq_s16(transmute(a)));
+    unsafe fn test_vcopyq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
+        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
+        let r: i32x4 = transmute(vcopyq_lane_s32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcgtz_s32(transmute(a)));
+    unsafe fn test_vcopyq_lane_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8x16 = transmute(vcopyq_lane_u8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtzq_s32(transmute(a)));
+    unsafe fn test_vcopyq_lane_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
+        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u16x8 = transmute(vcopyq_lane_u16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgtz_s64(transmute(a)));
+    unsafe fn test_vcopyq_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
+        let r: u32x4 = transmute(vcopyq_lane_u32::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcgtzq_s64(transmute(a)));
+    unsafe fn test_vcopyq_lane_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_lane_p8::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcgtz_f32(transmute(a)));
+    unsafe fn test_vcopyq_lane_p16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_lane_p16::<0, 1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtzq_f32(transmute(a)));
+    unsafe fn test_vcopyq_lane_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x2 = transmute(vcopyq_lane_s64::<1, 0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgtz_f64(transmute(a)));
+    unsafe fn test_vcopyq_lane_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(1, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcopyq_lane_u64::<1, 0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcgtzq_f64(transmute(a)));
+    unsafe fn test_vcopyq_lane_p64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x2 = transmute(vcopyq_lane_p64::<1, 0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vclez_s8(transmute(a)));
+    unsafe fn test_vcopyq_lane_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x2 = f32x2::new(0.5, 0.);
+        let e: f32x4 = f32x4::new(1., 0.5, 3., 4.);
+        let r: f32x4 = transmute(vcopyq_lane_f32::<1, 0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vclezq_s8(transmute(a)));
+    unsafe fn test_vcopyq_lane_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 0.5;
+        let e: f64x2 = f64x2::new(1., 0.5);
+        let r: f64x2 = transmute(vcopyq_lane_f64::<1, 0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0);
-        let r: u16x4 = transmute(vclez_s16(transmute(a)));
+    unsafe fn test_vcreate_f64() {
+        let a: u64 = 0;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vcreate_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vclezq_s16(transmute(a)));
+    unsafe fn test_vcvt_f64_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvt_f64_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclez_s32(transmute(a)));
+    unsafe fn test_vcvtq_f64_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(1., 2.);
+        let r: f64x2 = transmute(vcvtq_f64_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vclezq_s32(transmute(a)));
+    unsafe fn test_vcvt_f64_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvt_f64_u64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclez_s64(transmute(a)));
+    unsafe fn test_vcvtq_f64_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(1., 2.);
+        let r: f64x2 = transmute(vcvtq_f64_u64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vclezq_s64(transmute(a)));
+    unsafe fn test_vcvt_f64_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 1.2);
+        let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64);
+        let r: f64x2 = transmute(vcvt_f64_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclez_f32(transmute(a)));
+    unsafe fn test_vcvt_high_f64_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4);
+        let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64);
+        let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vclezq_f32(transmute(a)));
+    unsafe fn test_vcvt_f32_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 1.2);
+        let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32);
+        let r: f32x2 = transmute(vcvt_f32_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclez_f64(transmute(a)));
+    unsafe fn test_vcvt_high_f32_f64() {
+        let a: f32x2 = f32x2::new(-1.2, 1.2);
+        let b: f64x2 = f64x2::new(-2.3, 3.4);
+        let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32);
+        let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vclezq_f64(transmute(a)));
+    unsafe fn test_vcvtx_f32_f64() {
+        let a: f64x2 = f64x2::new(-1.0, 2.0);
+        let e: f32x2 = f32x2::new(-1.0, 2.0);
+        let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vcltz_s8(transmute(a)));
+    unsafe fn test_vcvtxd_f32_f64() {
+        let a: f64 = -1.0;
+        let e: f32 = -1.0;
+        let r: f32 = transmute(vcvtxd_f32_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vcltzq_s8(transmute(a)));
+    unsafe fn test_vcvtx_high_f32_f64() {
+        let a: f32x2 = f32x2::new(-1.0, 2.0);
+        let b: f64x2 = f64x2::new(-3.0, 4.0);
+        let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
+        let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
-        let r: u16x4 = transmute(vcltz_s16(transmute(a)));
+    unsafe fn test_vcvt_n_f64_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvt_n_f64_s64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vcltzq_s16(transmute(a)));
+    unsafe fn test_vcvtq_n_f64_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(0.25, 0.5);
+        let r: f64x2 = transmute(vcvtq_n_f64_s64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcltz_s32(transmute(a)));
+    unsafe fn test_vcvts_n_f32_s32() {
+        let a: i32 = 1;
+        let e: f32 = 0.25;
+        let r: f32 = transmute(vcvts_n_f32_s32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vcltzq_s32(transmute(a)));
+    unsafe fn test_vcvtd_n_f64_s64() {
+        let a: i64 = 1;
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvtd_n_f64_s64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcltz_s64(transmute(a)));
+    unsafe fn test_vcvt_n_f64_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvt_n_f64_u64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltzq_s64(transmute(a)));
+    unsafe fn test_vcvtq_n_f64_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(0.25, 0.5);
+        let r: f64x2 = transmute(vcvtq_n_f64_u64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vcltz_f32(transmute(a)));
+    unsafe fn test_vcvts_n_f32_u32() {
+        let a: u32 = 1;
+        let e: f32 = 0.25;
+        let r: f32 = transmute(vcvts_n_f32_u32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
-        let r: u32x4 = transmute(vcltzq_f32(transmute(a)));
+    unsafe fn test_vcvtd_n_f64_u64() {
+        let a: u64 = 1;
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvtd_n_f64_u64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcltz_f64(transmute(a)));
+    unsafe fn test_vcvt_n_s64_f64() {
+        let a: f64 = 0.25;
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vcvt_n_s64_f64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vcltzq_f64(transmute(a)));
+    unsafe fn test_vcvtq_n_s64_f64() {
+        let a: f64x2 = f64x2::new(0.25, 0.5);
+        let e: i64x2 = i64x2::new(1, 2);
+        let r: i64x2 = transmute(vcvtq_n_s64_f64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagt_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b)));
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_n_s32_f32() {
+        let a: f32 = 0.25;
+        let e: i32 = 1;
+        let r: i32 = transmute(vcvts_n_s32_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcagtq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvtd_n_s64_f64() {
+        let a: f64 = 0.25;
+        let e: i64 = 1;
+        let r: i64 = transmute(vcvtd_n_s64_f64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcage_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_n_u64_f64() {
+        let a: f64 = 0.25;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvt_n_u64_f64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcageq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_n_u64_f64() {
+        let a: f64x2 = f64x2::new(0.25, 0.5);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtq_n_u64_f64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcalt_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvts_n_u32_f32() {
+        let a: f32 = 0.25;
+        let e: u32 = 1;
+        let r: u32 = transmute(vcvts_n_u32_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcaltq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvtd_n_u64_f64() {
+        let a: f64 = 0.25;
+        let e: u64 = 1;
+        let r: u64 = transmute(vcvtd_n_u64_f64::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcale_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvts_f32_s32() {
+        let a: i32 = 1;
+        let e: f32 = 1.;
+        let r: f32 = transmute(vcvts_f32_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcaleq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvtd_f64_s64() {
+        let a: i64 = 1;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvtd_f64_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_lane_s8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvts_f32_u32() {
+        let a: u32 = 1;
+        let e: f32 = 1.;
+        let r: f32 = transmute(vcvts_f32_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_laneq_s8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtd_f64_u64() {
+        let a: u64 = 1;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvtd_f64_u64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_lane_s16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvts_s32_f32() {
+        let a: f32 = 1.;
+        let e: i32 = 1;
+        let r: i32 = transmute(vcvts_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_laneq_s16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtd_s64_f64() {
+        let a: f64 = 1.;
+        let e: i64 = 1;
+        let r: i64 = transmute(vcvtd_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
-        let r: i32x2 = transmute(vcopy_lane_s32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvts_u32_f32() {
+        let a: f32 = 1.;
+        let e: u32 = 1;
+        let r: u32 = transmute(vcvts_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
-        let r: i32x4 = transmute(vcopyq_laneq_s32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtd_u64_f64() {
+        let a: f64 = 1.;
+        let e: u64 = 1;
+        let r: u64 = transmute(vcvtd_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
-        let r: i64x2 = transmute(vcopyq_laneq_s64::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-1);
+        let r: i64x1 = transmute(vcvt_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vcopy_lane_u8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-1, 2);
+        let r: i64x2 = transmute(vcvtq_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vcopyq_laneq_u8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvt_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
-        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
-        let r: u16x4 = transmute(vcopy_lane_u16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vcopyq_laneq_u16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvta_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-1, 2);
+        let r: i32x2 = transmute(vcvta_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
-        let r: u32x2 = transmute(vcopy_lane_u32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtaq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-1, 2, -3, 4);
+        let r: i32x4 = transmute(vcvtaq_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
-        let r: u32x4 = transmute(vcopyq_laneq_u32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvta_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-1);
+        let r: i64x1 = transmute(vcvta_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 2);
-        let r: u64x2 = transmute(vcopyq_laneq_u64::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtaq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-1, 2);
+        let r: i64x2 = transmute(vcvtaq_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_lane_p8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtas_s32_f32() {
+        let a: f32 = 2.9;
+        let e: i32 = 3;
+        let r: i32 = transmute(vcvtas_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_laneq_p8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtad_s64_f64() {
+        let a: f64 = 2.9;
+        let e: i64 = 3;
+        let r: i64 = transmute(vcvtad_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_lane_p16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtas_u32_f32() {
+        let a: f32 = 2.9;
+        let e: u32 = 3;
+        let r: u32 = transmute(vcvtas_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_laneq_p16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtad_u64_f64() {
+        let a: f64 = 2.9;
+        let e: u64 = 3;
+        let r: u64 = transmute(vcvtad_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_p64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
-        let r: i64x2 = transmute(vcopyq_laneq_p64::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtn_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 2.1);
+        let e: i32x2 = i32x2::new(-2, 2);
+        let r: i32x2 = transmute(vcvtn_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(0., 0.5);
-        let e: f32x2 = f32x2::new(0.5, 2.);
-        let r: f32x2 = transmute(vcopy_lane_f32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtnq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-2, 2, -3, 4);
+        let r: i32x4 = transmute(vcvtnq_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
-        let e: f32x4 = f32x4::new(0.5, 2., 3., 4.);
-        let r: f32x4 = transmute(vcopyq_laneq_f32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtn_s64_f64() {
+        let a: f64 = -1.5;
+        let e: i64x1 = i64x1::new(-2);
+        let r: i64x1 = transmute(vcvtn_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(0., 0.5);
-        let e: f64x2 = f64x2::new(0.5, 2.);
-        let r: f64x2 = transmute(vcopyq_laneq_f64::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtnq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 2.1);
+        let e: i64x2 = i64x2::new(-2, 2);
+        let r: i64x2 = transmute(vcvtnq_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_laneq_s8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtns_s32_f32() {
+        let a: f32 = -1.5;
+        let e: i32 = -2;
+        let r: i32 = transmute(vcvtns_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_laneq_s16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtnd_s64_f64() {
+        let a: f64 = -1.5;
+        let e: i64 = -2;
+        let r: i64 = transmute(vcvtnd_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
-        let r: i32x2 = transmute(vcopy_laneq_s32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtm_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-2, 2);
+        let r: i32x2 = transmute(vcvtm_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vcopy_laneq_u8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtmq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-2, 2, -3, 3);
+        let r: i32x4 = transmute(vcvtmq_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
-        let r: u16x4 = transmute(vcopy_laneq_u16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtm_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-2);
+        let r: i64x1 = transmute(vcvtm_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
-        let r: u32x2 = transmute(vcopy_laneq_u32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtmq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-2, 2);
+        let r: i64x2 = transmute(vcvtmq_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_laneq_p8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtms_s32_f32() {
+        let a: f32 = -1.1;
+        let e: i32 = -2;
+        let r: i32 = transmute(vcvtms_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_p16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_laneq_p16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtmd_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64 = -2;
+        let r: i64 = transmute(vcvtmd_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
-        let e: f32x2 = f32x2::new(0.5, 2.);
-        let r: f32x2 = transmute(vcopy_laneq_f32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtp_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-1, 3);
+        let r: i32x2 = transmute(vcvtp_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_lane_s8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtpq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-1, 3, -2, 4);
+        let r: i32x4 = transmute(vcvtpq_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_lane_s16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtp_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-1);
+        let r: i64x1 = transmute(vcvtp_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
-        let r: i32x4 = transmute(vcopyq_lane_s32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtpq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-1, 3);
+        let r: i64x2 = transmute(vcvtpq_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vcopyq_lane_u8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtps_s32_f32() {
+        let a: f32 = -1.1;
+        let e: i32 = -1;
+        let r: i32 = transmute(vcvtps_s32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
-        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vcopyq_lane_u16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtpd_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64 = -1;
+        let r: i64 = transmute(vcvtpd_s64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
-        let r: u32x4 = transmute(vcopyq_lane_u32::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvta_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vcvta_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_lane_p8::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtaq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(1, 2, 3, 4);
+        let r: u32x4 = transmute(vcvtaq_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_p16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_lane_p16::<0, 1>(transmute(a), transmute(b)));
+    unsafe fn test_vcvta_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvta_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x2 = transmute(vcopyq_lane_s64::<1, 0>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtaq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtaq_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(1, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcopyq_lane_u64::<1, 0>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtn_u32_f32() {
+        let a: f32x2 = f32x2::new(1.5, 2.1);
+        let e: u32x2 = u32x2::new(2, 2);
+        let r: u32x2 = transmute(vcvtn_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_p64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x2 = transmute(vcopyq_lane_p64::<1, 0>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtnq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.5, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(2, 2, 3, 4);
+        let r: u32x4 = transmute(vcvtnq_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x2 = f32x2::new(0.5, 0.);
-        let e: f32x4 = f32x4::new(1., 0.5, 3., 4.);
-        let r: f32x4 = transmute(vcopyq_lane_f32::<1, 0>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtn_u64_f64() {
+        let a: f64 = 1.5;
+        let e: u64x1 = u64x1::new(2);
+        let r: u64x1 = transmute(vcvtn_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 0.5;
-        let e: f64x2 = f64x2::new(1., 0.5);
-        let r: f64x2 = transmute(vcopyq_lane_f64::<1, 0>(transmute(a), transmute(b)));
+    unsafe fn test_vcvtnq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.5, 2.1);
+        let e: u64x2 = u64x2::new(2, 2);
+        let r: u64x2 = transmute(vcvtnq_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_f64() {
-        let a: u64 = 0;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vcreate_f64(transmute(a)));
+    unsafe fn test_vcvtns_u32_f32() {
+        let a: f32 = 1.5;
+        let e: u32 = 2;
+        let r: u32 = transmute(vcvtns_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f64_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vcvt_f64_s64(transmute(a)));
+    unsafe fn test_vcvtnd_u64_f64() {
+        let a: f64 = 1.5;
+        let e: u64 = 2;
+        let r: u64 = transmute(vcvtnd_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f64_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vcvtq_f64_s64(transmute(a)));
+    unsafe fn test_vcvtm_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vcvtm_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f64_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vcvt_f64_u64(transmute(a)));
+    unsafe fn test_vcvtmq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(1, 2, 2, 3);
+        let r: u32x4 = transmute(vcvtmq_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f64_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vcvtq_f64_u64(transmute(a)));
+    unsafe fn test_vcvtm_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvtm_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f64_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 1.2);
-        let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64);
-        let r: f64x2 = transmute(vcvt_f64_f32(transmute(a)));
+    unsafe fn test_vcvtmq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtmq_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_high_f64_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4);
-        let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64);
-        let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a)));
+    unsafe fn test_vcvtms_u32_f32() {
+        let a: f32 = 1.1;
+        let e: u32 = 1;
+        let r: u32 = transmute(vcvtms_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f32_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 1.2);
-        let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32);
-        let r: f32x2 = transmute(vcvt_f32_f64(transmute(a)));
+    unsafe fn test_vcvtmd_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64 = 1;
+        let r: u64 = transmute(vcvtmd_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_high_f32_f64() {
-        let a: f32x2 = f32x2::new(-1.2, 1.2);
-        let b: f64x2 = f64x2::new(-2.3, 3.4);
-        let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32);
-        let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvtp_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(2, 3);
+        let r: u32x2 = transmute(vcvtp_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtx_f32_f64() {
-        let a: f64x2 = f64x2::new(-1.0, 2.0);
-        let e: f32x2 = f32x2::new(-1.0, 2.0);
-        let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a)));
+    unsafe fn test_vcvtpq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(2, 3, 3, 4);
+        let r: u32x4 = transmute(vcvtpq_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtx_high_f32_f64() {
-        let a: f32x2 = f32x2::new(-1.0, 2.0);
-        let b: f64x2 = f64x2::new(-3.0, 4.0);
-        let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
-        let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcvtp_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(2);
+        let r: u64x1 = transmute(vcvtp_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f64_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: f64 = 0.25;
-        let r: f64 = transmute(vcvt_n_f64_s64::<2>(transmute(a)));
+    unsafe fn test_vcvtpq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(2, 3);
+        let r: u64x2 = transmute(vcvtpq_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f64_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(0.25, 0.5);
-        let r: f64x2 = transmute(vcvtq_n_f64_s64::<2>(transmute(a)));
+    unsafe fn test_vcvtps_u32_f32() {
+        let a: f32 = 1.1;
+        let e: u32 = 2;
+        let r: u32 = transmute(vcvtps_u32_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_f32_s32() {
-        let a: i32 = 1;
-        let e: f32 = 0.25;
-        let r: f32 = transmute(vcvts_n_f32_s32::<2>(transmute(a)));
+    unsafe fn test_vcvtpd_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64 = 2;
+        let r: u64 = transmute(vcvtpd_u64_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_f64_s64() {
-        let a: i64 = 1;
-        let e: f64 = 0.25;
-        let r: f64 = transmute(vcvtd_n_f64_s64::<2>(transmute(a)));
+    unsafe fn test_vdupq_laneq_p64() {
+        let a: i64x2 = i64x2::new(1, 1);
+        let e: i64x2 = i64x2::new(1, 1);
+        let r: i64x2 = transmute(vdupq_laneq_p64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f64_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: f64 = 0.25;
-        let r: f64 = transmute(vcvt_n_f64_u64::<2>(transmute(a)));
+    unsafe fn test_vdupq_lane_p64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: i64x2 = i64x2::new(1, 1);
+        let r: i64x2 = transmute(vdupq_lane_p64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f64_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(0.25, 0.5);
-        let r: f64x2 = transmute(vcvtq_n_f64_u64::<2>(transmute(a)));
+    unsafe fn test_vdupq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(1., 1.);
+        let r: f64x2 = transmute(vdupq_laneq_f64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_f32_u32() {
-        let a: u32 = 1;
-        let e: f32 = 0.25;
-        let r: f32 = transmute(vcvts_n_f32_u32::<2>(transmute(a)));
+    unsafe fn test_vdupq_lane_f64() {
+        let a: f64 = 1.;
+        let e: f64x2 = f64x2::new(1., 1.);
+        let r: f64x2 = transmute(vdupq_lane_f64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_f64_u64() {
-        let a: u64 = 1;
-        let e: f64 = 0.25;
-        let r: f64 = transmute(vcvtd_n_f64_u64::<2>(transmute(a)));
+    unsafe fn test_vdup_lane_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vdup_lane_p64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_s64_f64() {
-        let a: f64 = 0.25;
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vcvt_n_s64_f64::<2>(transmute(a)));
+    unsafe fn test_vdup_lane_f64() {
+        let a: f64 = 0.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vdup_lane_f64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_s64_f64() {
-        let a: f64x2 = f64x2::new(0.25, 0.5);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vcvtq_n_s64_f64::<2>(transmute(a)));
+    unsafe fn test_vdup_laneq_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vdup_laneq_p64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_s32_f32() {
-        let a: f32 = 0.25;
-        let e: i32 = 1;
-        let r: i32 = transmute(vcvts_n_s32_f32::<2>(transmute(a)));
+    unsafe fn test_vdup_laneq_f64() {
+        let a: f64x2 = f64x2::new(0., 1.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vdup_laneq_f64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_s64_f64() {
-        let a: f64 = 0.25;
-        let e: i64 = 1;
-        let r: i64 = transmute(vcvtd_n_s64_f64::<2>(transmute(a)));
+    unsafe fn test_vdupb_lane_s8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i8 = 1;
+        let r: i8 = transmute(vdupb_lane_s8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_u64_f64() {
-        let a: f64 = 0.25;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvt_n_u64_f64::<2>(transmute(a)));
+    unsafe fn test_vdupb_laneq_s8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8 = 1;
+        let r: i8 = transmute(vdupb_laneq_s8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_u64_f64() {
-        let a: f64x2 = f64x2::new(0.25, 0.5);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtq_n_u64_f64::<2>(transmute(a)));
+    unsafe fn test_vduph_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: i16 = 1;
+        let r: i16 = transmute(vduph_lane_s16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_u32_f32() {
-        let a: f32 = 0.25;
-        let e: u32 = 1;
-        let r: u32 = transmute(vcvts_n_u32_f32::<2>(transmute(a)));
+    unsafe fn test_vduph_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i16 = 1;
+        let r: i16 = transmute(vduph_laneq_s16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_u64_f64() {
-        let a: f64 = 0.25;
-        let e: u64 = 1;
-        let r: u64 = transmute(vcvtd_n_u64_f64::<2>(transmute(a)));
+    unsafe fn test_vdups_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let e: i32 = 1;
+        let r: i32 = transmute(vdups_lane_s32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_f32_s32() {
-        let a: i32 = 1;
-        let e: f32 = 1.;
-        let r: f32 = transmute(vcvts_f32_s32(transmute(a)));
+    unsafe fn test_vdups_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 4);
+        let e: i32 = 1;
+        let r: i32 = transmute(vdups_laneq_s32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_f64_s64() {
-        let a: i64 = 1;
-        let e: f64 = 1.;
-        let r: f64 = transmute(vcvtd_f64_s64(transmute(a)));
+    unsafe fn test_vdupd_lane_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: i64 = 1;
+        let r: i64 = transmute(vdupd_lane_s64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_f32_u32() {
-        let a: u32 = 1;
-        let e: f32 = 1.;
-        let r: f32 = transmute(vcvts_f32_u32(transmute(a)));
+    unsafe fn test_vdupd_laneq_s64() {
+        let a: i64x2 = i64x2::new(1, 1);
+        let e: i64 = 1;
+        let r: i64 = transmute(vdupd_laneq_s64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_f64_u64() {
-        let a: u64 = 1;
-        let e: f64 = 1.;
-        let r: f64 = transmute(vcvtd_f64_u64(transmute(a)));
+    unsafe fn test_vdupb_lane_u8() {
+        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u8 = 1;
+        let r: u8 = transmute(vdupb_lane_u8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_s32_f32() {
-        let a: f32 = 1.;
-        let e: i32 = 1;
-        let r: i32 = transmute(vcvts_s32_f32(transmute(a)));
+    unsafe fn test_vdupb_laneq_u8() {
+        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8 = 1;
+        let r: u8 = transmute(vdupb_laneq_u8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_s64_f64() {
-        let a: f64 = 1.;
-        let e: i64 = 1;
-        let r: i64 = transmute(vcvtd_s64_f64(transmute(a)));
+    unsafe fn test_vduph_lane_u16() {
+        let a: u16x4 = u16x4::new(1, 1, 1, 4);
+        let e: u16 = 1;
+        let r: u16 = transmute(vduph_lane_u16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_u32_f32() {
-        let a: f32 = 1.;
-        let e: u32 = 1;
-        let r: u32 = transmute(vcvts_u32_f32(transmute(a)));
+    unsafe fn test_vduph_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u16 = 1;
+        let r: u16 = transmute(vduph_laneq_u16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_u64_f64() {
-        let a: f64 = 1.;
-        let e: u64 = 1;
-        let r: u64 = transmute(vcvtd_u64_f64(transmute(a)));
+    unsafe fn test_vdups_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 1);
+        let e: u32 = 1;
+        let r: u32 = transmute(vdups_lane_u32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-1);
-        let r: i64x1 = transmute(vcvt_s64_f64(transmute(a)));
+    unsafe fn test_vdups_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 1, 1, 4);
+        let e: u32 = 1;
+        let r: u32 = transmute(vdups_laneq_u32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-1, 2);
-        let r: i64x2 = transmute(vcvtq_s64_f64(transmute(a)));
+    unsafe fn test_vdupd_lane_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: u64 = 1;
+        let r: u64 = transmute(vdupd_lane_u64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_laneq_u64() {
+        let a: u64x2 = u64x2::new(1, 1);
+        let e: u64 = 1;
+        let r: u64 = transmute(vdupd_laneq_u64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
+    unsafe fn test_vdupb_lane_p8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: p8 = 1;
+        let r: p8 = transmute(vdupb_lane_p8::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-1, 2);
-        let r: i32x2 = transmute(vcvta_s32_f32(transmute(a)));
+    unsafe fn test_vdupb_laneq_p8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: p8 = 1;
+        let r: p8 = transmute(vdupb_laneq_p8::<8>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-1, 2, -3, 4);
-        let r: i32x4 = transmute(vcvtaq_s32_f32(transmute(a)));
+    unsafe fn test_vduph_lane_p16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: p16 = 1;
+        let r: p16 = transmute(vduph_lane_p16::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-1);
-        let r: i64x1 = transmute(vcvta_s64_f64(transmute(a)));
+    unsafe fn test_vduph_laneq_p16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: p16 = 1;
+        let r: p16 = transmute(vduph_laneq_p16::<4>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-1, 2);
-        let r: i64x2 = transmute(vcvtaq_s64_f64(transmute(a)));
+    unsafe fn test_vdups_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vdups_lane_f32::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtas_s32_f32() {
-        let a: f32 = 2.9;
-        let e: i32 = 3;
-        let r: i32 = transmute(vcvtas_s32_f32(transmute(a)));
+    unsafe fn test_vdups_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vdups_laneq_f32::<2>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtad_s64_f64() {
-        let a: f64 = 2.9;
-        let e: i64 = 3;
-        let r: i64 = transmute(vcvtad_s64_f64(transmute(a)));
+    unsafe fn test_vdupd_lane_f64() {
+        let a: f64 = 1.;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vdupd_lane_f64::<0>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtas_u32_f32() {
-        let a: f32 = 2.9;
-        let e: u32 = 3;
-        let r: u32 = transmute(vcvtas_u32_f32(transmute(a)));
+    unsafe fn test_vdupd_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vdupd_laneq_f64::<1>(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtad_u64_f64() {
-        let a: f64 = 2.9;
-        let e: u64 = 3;
-        let r: u64 = transmute(vcvtad_u64_f64(transmute(a)));
+    unsafe fn test_vextq_p64() {
+        let a: i64x2 = i64x2::new(0, 8);
+        let b: i64x2 = i64x2::new(9, 11);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vextq_p64::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 2.1);
-        let e: i32x2 = i32x2::new(-2, 2);
-        let r: i32x2 = transmute(vcvtn_s32_f32(transmute(a)));
+    unsafe fn test_vextq_f64() {
+        let a: f64x2 = f64x2::new(0., 2.);
+        let b: f64x2 = f64x2::new(3., 4.);
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vextq_f64::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-2, 2, -3, 4);
-        let r: i32x4 = transmute(vcvtnq_s32_f32(transmute(a)));
+    unsafe fn test_vmla_f64() {
+        let a: f64 = 0.;
+        let b: f64 = 2.;
+        let c: f64 = 3.;
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmla_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_s64_f64() {
-        let a: f64 = -1.5;
-        let e: i64x1 = i64x1::new(-2);
-        let r: i64x1 = transmute(vcvtn_s64_f64(transmute(a)));
+    unsafe fn test_vmlaq_f64() {
+        let a: f64x2 = f64x2::new(0., 1.);
+        let b: f64x2 = f64x2::new(2., 2.);
+        let c: f64x2 = f64x2::new(3., 3.);
+        let e: f64x2 = f64x2::new(6., 7.);
+        let r: f64x2 = transmute(vmlaq_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 2.1);
-        let e: i64x2 = i64x2::new(-2, 2);
-        let r: i64x2 = transmute(vcvtnq_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_s8() {
+        let a: i16x8 = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
+        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i16x8 = transmute(vmlal_high_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtns_s32_f32() {
-        let a: f32 = -1.5;
-        let e: i32 = -2;
-        let r: i32 = transmute(vcvtns_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnd_s64_f64() {
-        let a: f64 = -1.5;
-        let e: i64 = -2;
-        let r: i64 = transmute(vcvtnd_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x4 = i32x4::new(3, 3, 0, 1);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-2, 2);
-        let r: i32x2 = transmute(vcvtm_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_u8() {
+        let a: u16x8 = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
+        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u16x8 = transmute(vmlal_high_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-2, 2, -3, 3);
-        let r: i32x4 = transmute(vcvtmq_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-2);
-        let r: i64x1 = transmute(vcvtm_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x4 = u32x4::new(3, 3, 0, 1);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-2, 2);
-        let r: i64x2 = transmute(vcvtmq_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_n_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16 = 2;
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtms_s32_f32() {
-        let a: f32 = -1.1;
-        let e: i32 = -2;
-        let r: i32 = transmute(vcvtms_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_n_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32 = 2;
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmd_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64 = -2;
-        let r: i64 = transmute(vcvtmd_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_n_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16 = 2;
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-1, 3);
-        let r: i32x2 = transmute(vcvtp_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_n_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32 = 2;
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-1, 3, -2, 4);
-        let r: i32x4 = transmute(vcvtpq_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_lane_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-1);
-        let r: i64x1 = transmute(vcvtp_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_laneq_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-1, 3);
-        let r: i64x2 = transmute(vcvtpq_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_lane_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtps_s32_f32() {
-        let a: f32 = -1.1;
-        let e: i32 = -1;
-        let r: i32 = transmute(vcvtps_s32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_laneq_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpd_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64 = -1;
-        let r: i64 = transmute(vcvtpd_s64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_lane_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x4 = u16x4::new(0, 2, 0, 0);
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvta_u32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_laneq_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtaq_u32_f32(transmute(a)));
+    unsafe fn test_vmlal_high_lane_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x2 = u32x2::new(0, 2);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvta_u64_f64(transmute(a)));
+    unsafe fn test_vmlal_high_laneq_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x4 = u32x4::new(0, 2, 0, 0);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtaq_u64_f64(transmute(a)));
+    unsafe fn test_vmls_f64() {
+        let a: f64 = 6.;
+        let b: f64 = 2.;
+        let c: f64 = 3.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vmls_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_u32_f32() {
-        let a: f32x2 = f32x2::new(1.5, 2.1);
-        let e: u32x2 = u32x2::new(2, 2);
-        let r: u32x2 = transmute(vcvtn_u32_f32(transmute(a)));
+    unsafe fn test_vmlsq_f64() {
+        let a: f64x2 = f64x2::new(6., 7.);
+        let b: f64x2 = f64x2::new(2., 2.);
+        let c: f64x2 = f64x2::new(3., 3.);
+        let e: f64x2 = f64x2::new(0., 1.);
+        let r: f64x2 = transmute(vmlsq_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.5, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(2, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtnq_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_s8() {
+        let a: i16x8 = i16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
+        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: i16x8 = i16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
+        let r: i16x8 = transmute(vmlsl_high_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_u64_f64() {
-        let a: f64 = 1.5;
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vcvtn_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.5, 2.1);
-        let e: u64x2 = u64x2::new(2, 2);
-        let r: u64x2 = transmute(vcvtnq_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x4 = i32x4::new(3, 3, 0, 1);
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtns_u32_f32() {
-        let a: f32 = 1.5;
-        let e: u32 = 2;
-        let r: u32 = transmute(vcvtns_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_u8() {
+        let a: u16x8 = u16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
+        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
+        let r: u16x8 = transmute(vmlsl_high_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnd_u64_f64() {
-        let a: f64 = 1.5;
-        let e: u64 = 2;
-        let r: u64 = transmute(vcvtnd_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvtm_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x4 = u32x4::new(3, 3, 0, 1);
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(1, 2, 2, 3);
-        let r: u32x4 = transmute(vcvtmq_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_n_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16 = 2;
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvtm_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_n_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32 = 2;
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtmq_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_n_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16 = 2;
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtms_u32_f32() {
-        let a: f32 = 1.1;
-        let e: u32 = 1;
-        let r: u32 = transmute(vcvtms_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_n_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32 = 2;
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmd_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64 = 1;
-        let r: u64 = transmute(vcvtmd_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_lane_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vcvtp_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_laneq_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(2, 3, 3, 4);
-        let r: u32x4 = transmute(vcvtpq_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_lane_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vcvtp_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_laneq_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(2, 3);
-        let r: u64x2 = transmute(vcvtpq_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_lane_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x4 = u16x4::new(0, 2, 0, 0);
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtps_u32_f32() {
-        let a: f32 = 1.1;
-        let e: u32 = 2;
-        let r: u32 = transmute(vcvtps_u32_f32(transmute(a)));
+    unsafe fn test_vmlsl_high_laneq_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpd_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64 = 2;
-        let r: u64 = transmute(vcvtpd_u64_f64(transmute(a)));
+    unsafe fn test_vmlsl_high_lane_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x2 = u32x2::new(0, 2);
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_p64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_laneq_p64::<1>(transmute(a)));
+    unsafe fn test_vmlsl_high_laneq_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x4 = u32x4::new(0, 2, 0, 0);
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_p64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_lane_p64::<0>(transmute(a)));
+    unsafe fn test_vmovn_high_s16() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
+        let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let e: f64x2 = f64x2::new(1., 1.);
-        let r: f64x2 = transmute(vdupq_laneq_f64::<1>(transmute(a)));
+    unsafe fn test_vmovn_high_s32() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(2, 3, 4, 5);
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_f64() {
-        let a: f64 = 1.;
-        let e: f64x2 = f64x2::new(1., 1.);
-        let r: f64x2 = transmute(vdupq_lane_f64::<0>(transmute(a)));
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_s64() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(2, 3);
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vdup_lane_p64::<0>(transmute(a)));
+    unsafe fn test_vmovn_high_u16() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
+        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_f64() {
-        let a: f64 = 0.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vdup_lane_f64::<0>(transmute(a)));
+    unsafe fn test_vmovn_high_u32() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(2, 3, 4, 5);
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vdup_laneq_p64::<1>(transmute(a)));
+    unsafe fn test_vmovn_high_u64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u64x2 = u64x2::new(2, 3);
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_f64() {
-        let a: f64x2 = f64x2::new(0., 1.);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vdup_laneq_f64::<1>(transmute(a)));
+    unsafe fn test_vneg_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vneg_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8 = 1;
-        let r: i8 = transmute(vdupb_lane_s8::<4>(transmute(a)));
+    unsafe fn test_vnegq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x2 = i64x2::new(0, -1);
+        let r: i64x2 = transmute(vnegq_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8 = 1;
-        let r: i8 = transmute(vdupb_laneq_s8::<8>(transmute(a)));
+    unsafe fn test_vnegd_s64() {
+        let a: i64 = 1;
+        let e: i64 = -1;
+        let r: i64 = transmute(vnegd_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16 = 1;
-        let r: i16 = transmute(vduph_lane_s16::<2>(transmute(a)));
+    unsafe fn test_vneg_f64() {
+        let a: f64 = 0.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vneg_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16 = 1;
-        let r: i16 = transmute(vduph_laneq_s16::<4>(transmute(a)));
+    unsafe fn test_vnegq_f64() {
+        let a: f64x2 = f64x2::new(0., 1.);
+        let e: f64x2 = f64x2::new(0., -1.);
+        let r: f64x2 = transmute(vnegq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let e: i32 = 1;
-        let r: i32 = transmute(vdups_lane_s32::<1>(transmute(a)));
+    unsafe fn test_vqneg_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vqneg_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 4);
-        let e: i32 = 1;
-        let r: i32 = transmute(vdups_laneq_s32::<2>(transmute(a)));
+    unsafe fn test_vqnegq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: i64x2 = transmute(vqnegq_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_lane_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64 = 1;
-        let r: i64 = transmute(vdupd_lane_s64::<0>(transmute(a)));
+    unsafe fn test_vqnegb_s8() {
+        let a: i8 = 1;
+        let e: i8 = -1;
+        let r: i8 = transmute(vqnegb_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_laneq_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let e: i64 = 1;
-        let r: i64 = transmute(vdupd_laneq_s64::<1>(transmute(a)));
+    unsafe fn test_vqnegh_s16() {
+        let a: i16 = 1;
+        let e: i16 = -1;
+        let r: i16 = transmute(vqnegh_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u8 = 1;
-        let r: u8 = transmute(vdupb_lane_u8::<4>(transmute(a)));
+    unsafe fn test_vqnegs_s32() {
+        let a: i32 = 1;
+        let e: i32 = -1;
+        let r: i32 = transmute(vqnegs_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8 = 1;
-        let r: u8 = transmute(vdupb_laneq_u8::<8>(transmute(a)));
+    unsafe fn test_vqnegd_s64() {
+        let a: i64 = 1;
+        let e: i64 = -1;
+        let r: i64 = transmute(vqnegd_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 4);
-        let e: u16 = 1;
-        let r: u16 = transmute(vduph_lane_u16::<2>(transmute(a)));
+    unsafe fn test_vqsubb_s8() {
+        let a: i8 = 42;
+        let b: i8 = 1;
+        let e: i8 = 41;
+        let r: i8 = transmute(vqsubb_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u16 = 1;
-        let r: u16 = transmute(vduph_laneq_u16::<4>(transmute(a)));
+    unsafe fn test_vqsubh_s16() {
+        let a: i16 = 42;
+        let b: i16 = 1;
+        let e: i16 = 41;
+        let r: i16 = transmute(vqsubh_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let e: u32 = 1;
-        let r: u32 = transmute(vdups_lane_u32::<1>(transmute(a)));
+    unsafe fn test_vqsubb_u8() {
+        let a: u8 = 42;
+        let b: u8 = 1;
+        let e: u8 = 41;
+        let r: u8 = transmute(vqsubb_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 4);
-        let e: u32 = 1;
-        let r: u32 = transmute(vdups_laneq_u32::<2>(transmute(a)));
+    unsafe fn test_vqsubh_u16() {
+        let a: u16 = 42;
+        let b: u16 = 1;
+        let e: u16 = 41;
+        let r: u16 = transmute(vqsubh_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_lane_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: u64 = 1;
-        let r: u64 = transmute(vdupd_lane_u64::<0>(transmute(a)));
+    unsafe fn test_vqsubs_u32() {
+        let a: u32 = 42;
+        let b: u32 = 1;
+        let e: u32 = 41;
+        let r: u32 = transmute(vqsubs_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_laneq_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let e: u64 = 1;
-        let r: u64 = transmute(vdupd_laneq_u64::<1>(transmute(a)));
+    unsafe fn test_vqsubd_u64() {
+        let a: u64 = 42;
+        let b: u64 = 1;
+        let e: u64 = 41;
+        let r: u64 = transmute(vqsubd_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: p8 = 1;
-        let r: p8 = transmute(vdupb_lane_p8::<4>(transmute(a)));
+    unsafe fn test_vqsubs_s32() {
+        let a: i32 = 42;
+        let b: i32 = 1;
+        let e: i32 = 41;
+        let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: p8 = 1;
-        let r: p8 = transmute(vdupb_laneq_p8::<8>(transmute(a)));
+    unsafe fn test_vqsubd_s64() {
+        let a: i64 = 42;
+        let b: i64 = 1;
+        let e: i64 = 41;
+        let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: p16 = 1;
-        let r: p16 = transmute(vduph_lane_p16::<2>(transmute(a)));
+    unsafe fn test_vraddhn_high_s16() {
+        let a: i8x8 = i8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x16 = i8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vraddhn_high_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: p16 = 1;
-        let r: p16 = transmute(vduph_laneq_p16::<4>(transmute(a)));
+    unsafe fn test_vraddhn_high_s32() {
+        let a: i16x4 = i16x4::new(1, 2, 0, 0);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let c: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i16x8 = i16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vraddhn_high_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let e: f32 = 1.;
-        let r: f32 = transmute(vdups_lane_f32::<1>(transmute(a)));
+    unsafe fn test_vraddhn_high_s64() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i64x2 = i64x2::new(1, 2);
+        let c: i64x2 = i64x2::new(1, 2);
+        let e: i32x4 = i32x4::new(1, 2, 0, 0);
+        let r: i32x4 = transmute(vraddhn_high_s64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
-        let e: f32 = 1.;
-        let r: f32 = transmute(vdups_laneq_f32::<2>(transmute(a)));
+    unsafe fn test_vraddhn_high_u16() {
+        let a: u8x8 = u8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x16 = u8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vraddhn_high_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_lane_f64() {
-        let a: f64 = 1.;
-        let e: f64 = 1.;
-        let r: f64 = transmute(vdupd_lane_f64::<0>(transmute(a)));
+    unsafe fn test_vraddhn_high_u32() {
+        let a: u16x4 = u16x4::new(1, 2, 0, 0);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u16x8 = u16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vraddhn_high_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vdupd_laneq_f64::<1>(transmute(a)));
+    unsafe fn test_vraddhn_high_u64() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u64x2 = u64x2::new(1, 2);
+        let c: u64x2 = u64x2::new(1, 2);
+        let e: u32x4 = u32x4::new(1, 2, 0, 0);
+        let r: u32x4 = transmute(vraddhn_high_u64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_p64() {
-        let a: i64x2 = i64x2::new(0, 8);
-        let b: i64x2 = i64x2::new(9, 11);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vextq_p64::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vrbit_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
+        let r: i8x8 = transmute(vrbit_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_f64() {
-        let a: f64x2 = f64x2::new(0., 2.);
-        let b: f64x2 = f64x2::new(3., 4.);
-        let e: f64x2 = f64x2::new(2., 3.);
-        let r: f64x2 = transmute(vextq_f64::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vrbitq_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
+        let r: i8x16 = transmute(vrbitq_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_f64() {
-        let a: f64 = 0.;
-        let b: f64 = 2.;
-        let c: f64 = 3.;
-        let e: f64 = 6.;
-        let r: f64 = transmute(vmla_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrbit_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
+        let r: u8x8 = transmute(vrbit_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_f64() {
-        let a: f64x2 = f64x2::new(0., 1.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let c: f64x2 = f64x2::new(3., 3.);
-        let e: f64x2 = f64x2::new(6., 7.);
-        let r: f64x2 = transmute(vmlaq_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrbitq_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
+        let r: u8x16 = transmute(vrbitq_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_s8() {
-        let a: i16x8 = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i16x8 = transmute(vmlal_high_s8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrbit_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
+        let r: i8x8 = transmute(vrbit_p8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrbitq_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
+        let r: i8x16 = transmute(vrbitq_p8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 0, 1);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndx_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndx_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_u8() {
-        let a: u16x8 = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u16x8 = transmute(vmlal_high_u8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndxq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndxq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndx_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndx_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 0, 1);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndxq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndxq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrnda_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 1.0);
+        let r: f32x2 = transmute(vrnda_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndaq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0);
+        let r: f32x4 = transmute(vrndaq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16 = 2;
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrnda_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrnda_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32 = 2;
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndaq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 1.0);
+        let r: f64x2 = transmute(vrndaq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndn_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndn_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndnq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndns_f32() {
+        let a: f32 = -1.5;
+        let e: f32 = -2.0;
+        let r: f32 = transmute(vrndns_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndm_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndm_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndmq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0);
+        let r: f32x4 = transmute(vrndmq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndm_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndm_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndmq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndmq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndp_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-1.0, 1.0);
+        let r: f32x2 = transmute(vrndp_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_f64() {
-        let a: f64 = 6.;
-        let b: f64 = 2.;
-        let c: f64 = 3.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vmls_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndpq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0);
+        let r: f32x4 = transmute(vrndpq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_f64() {
-        let a: f64x2 = f64x2::new(6., 7.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let c: f64x2 = f64x2::new(3., 3.);
-        let e: f64x2 = f64x2::new(0., 1.);
-        let r: f64x2 = transmute(vmlsq_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndp_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -1.0;
+        let r: f64 = transmute(vrndp_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_s8() {
-        let a: i16x8 = i16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
-        let r: i16x8 = transmute(vmlsl_high_s8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndpq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-1.0, 1.0);
+        let r: f64x2 = transmute(vrndpq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrnd_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-1.0, 0.0);
+        let r: f32x2 = transmute(vrnd_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 0, 1);
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0);
+        let r: f32x4 = transmute(vrndq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_u8() {
-        let a: u16x8 = u16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
-        let r: u16x8 = transmute(vmlsl_high_u8(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrnd_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -1.0;
+        let r: f64 = transmute(vrnd_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-1.0, 0.0);
+        let r: f64x2 = transmute(vrndq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 0, 1);
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndi_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndi_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndiq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndiq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndi_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndi_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16 = 2;
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vrndiq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndiq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32 = 2;
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddb_s8() {
+        let a: i8 = 42;
+        let b: i8 = 1;
+        let e: i8 = 43;
+        let r: i8 = transmute(vqaddb_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddh_s16() {
+        let a: i16 = 42;
+        let b: i16 = 1;
+        let e: i16 = 43;
+        let r: i16 = transmute(vqaddh_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddb_u8() {
+        let a: u8 = 42;
+        let b: u8 = 1;
+        let e: u8 = 43;
+        let r: u8 = transmute(vqaddb_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddh_u16() {
+        let a: u16 = 42;
+        let b: u16 = 1;
+        let e: u16 = 43;
+        let r: u16 = transmute(vqaddh_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadds_u32() {
+        let a: u32 = 42;
+        let b: u32 = 1;
+        let e: u32 = 43;
+        let r: u32 = transmute(vqadds_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddd_u64() {
+        let a: u64 = 42;
+        let b: u64 = 1;
+        let e: u64 = 43;
+        let r: u64 = transmute(vqaddd_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqadds_s32() {
+        let a: i32 = 42;
+        let b: i32 = 1;
+        let e: i32 = 43;
+        let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vqaddd_s64() {
+        let a: i64 = 42;
+        let b: i64 = 1;
+        let e: i64 = 43;
+        let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vld1_f64_x2() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_s16() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_f64_x2() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
+        let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_s32() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 3, 4, 5);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld1_f64_x3() {
+        let a: [f64; 4] = [0., 1., 2., 3.];
+        let e: [f64; 3] = [1., 2., 3.];
+        let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_s64() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(2, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_f64_x3() {
+        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
+        let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_u16() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld1_f64_x4() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_u32() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(2, 3, 4, 5);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld1q_f64_x4() {
+        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
+        let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_u64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u64x2 = u64x2::new(2, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b)));
+    unsafe fn test_vld2q_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
+        let r: [i64x2; 2] = transmute(vld2q_s64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vneg_s64(transmute(a)));
+    unsafe fn test_vld2q_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 3)];
+        let r: [u64x2; 2] = transmute(vld2q_u64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, -1);
-        let r: i64x2 = transmute(vnegq_s64(transmute(a)));
+    unsafe fn test_vld2q_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
+        let r: [i64x2; 2] = transmute(vld2q_p64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_f64() {
-        let a: f64 = 0.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vneg_f64(transmute(a)));
+    unsafe fn test_vld2_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld2_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_f64() {
-        let a: f64x2 = f64x2::new(0., 1.);
-        let e: f64x2 = f64x2::new(0., -1.);
-        let r: f64x2 = transmute(vnegq_f64(transmute(a)));
+    unsafe fn test_vld2q_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 3.];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 3.)];
+        let r: [f64x2; 2] = transmute(vld2q_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vqneg_s64(transmute(a)));
+    unsafe fn test_vld2q_dup_s64() {
+        let a: [i64; 5] = [0, 1, 1, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 2] = transmute(vld2q_dup_s64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: i64x2 = transmute(vqnegq_s64(transmute(a)));
+    unsafe fn test_vld2q_dup_u64() {
+        let a: [u64; 5] = [0, 1, 1, 2, 3];
+        let e: [u64x2; 2] = [u64x2::new(1, 1), u64x2::new(1, 1)];
+        let r: [u64x2; 2] = transmute(vld2q_dup_u64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubb_s8() {
-        let a: i8 = 42;
-        let b: i8 = 1;
-        let e: i8 = 41;
-        let r: i8 = transmute(vqsubb_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld2q_dup_p64() {
+        let a: [u64; 5] = [0, 1, 1, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 2] = transmute(vld2q_dup_p64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubh_s16() {
-        let a: i16 = 42;
-        let b: i16 = 1;
-        let e: i16 = 41;
-        let r: i16 = transmute(vqsubh_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld2_dup_f64() {
+        let a: [f64; 3] = [0., 1., 1.];
+        let e: [f64; 2] = [1., 1.];
+        let r: [f64; 2] = transmute(vld2_dup_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubb_u8() {
-        let a: u8 = 42;
-        let b: u8 = 1;
-        let e: u8 = 41;
-        let r: u8 = transmute(vqsubb_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld2q_dup_f64() {
+        let a: [f64; 5] = [0., 1., 1., 2., 3.];
+        let e: [f64x2; 2] = [f64x2::new(1., 1.), f64x2::new(1., 1.)];
+        let r: [f64x2; 2] = transmute(vld2q_dup_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubh_u16() {
-        let a: u16 = 42;
-        let b: u16 = 1;
-        let e: u16 = 41;
-        let r: u16 = transmute(vqsubh_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld2q_lane_s8() {
+        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let r: [i8x16; 2] = transmute(vld2q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubs_u32() {
-        let a: u32 = 42;
-        let b: u32 = 1;
-        let e: u32 = 41;
-        let r: u32 = transmute(vqsubs_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld2_lane_s64() {
+        let a: [i64; 3] = [0, 1, 2];
+        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
+        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
+        let r: [i64x1; 2] = transmute(vld2_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubd_u64() {
-        let a: u64 = 42;
-        let b: u64 = 1;
-        let e: u64 = 41;
-        let r: u64 = transmute(vqsubd_u64(transmute(a), transmute(b)));
+    unsafe fn test_vld2q_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 3, 4];
+        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
+        let r: [i64x2; 2] = transmute(vld2q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubs_s32() {
-        let a: i32 = 42;
-        let b: i32 = 1;
-        let e: i32 = 41;
-        let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld2_lane_p64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
+        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
+        let r: [i64x1; 2] = transmute(vld2_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubd_s64() {
-        let a: i64 = 42;
-        let b: i64 = 1;
-        let e: i64 = 41;
-        let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld2q_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 3, 4];
+        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
+        let r: [i64x2; 2] = transmute(vld2q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrbit_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
-        let r: i8x8 = transmute(vrbit_s8(transmute(a)));
+    unsafe fn test_vld2q_lane_u8() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [u8x16; 2] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let e: [u8x16; 2] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let r: [u8x16; 2] = transmute(vld2q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrbitq_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
-        let r: i8x16 = transmute(vrbitq_s8(transmute(a)));
+    unsafe fn test_vld2_lane_u64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let b: [u64x1; 2] = [u64x1::new(0), u64x1::new(2)];
+        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
+        let r: [u64x1; 2] = transmute(vld2_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrbit_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
-        let r: u8x8 = transmute(vrbit_u8(transmute(a)));
+    unsafe fn test_vld2q_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 3, 4];
+        let b: [u64x2; 2] = [u64x2::new(0, 2), u64x2::new(2, 14)];
+        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 14)];
+        let r: [u64x2; 2] = transmute(vld2q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrbitq_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
-        let r: u8x16 = transmute(vrbitq_u8(transmute(a)));
+    unsafe fn test_vld2q_lane_p8() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let r: [i8x16; 2] = transmute(vld2q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrbit_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
-        let r: i8x8 = transmute(vrbit_p8(transmute(a)));
+    unsafe fn test_vld2_lane_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let b: [f64; 2] = [0., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld2_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrbitq_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
-        let r: i8x16 = transmute(vrbitq_p8(transmute(a)));
+    unsafe fn test_vld2q_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let b: [f64x2; 2] = [f64x2::new(0., 2.), f64x2::new(2., 14.)];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 14.)];
+        let r: [f64x2; 2] = transmute(vld2q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndx_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndx_f32(transmute(a)));
+    unsafe fn test_vld3q_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 2, 4, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
+        let r: [i64x2; 3] = transmute(vld3q_s64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndxq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndxq_f32(transmute(a)));
+    unsafe fn test_vld3q_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
+        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 4), u64x2::new(2, 4)];
+        let r: [u64x2; 3] = transmute(vld3q_u64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndx_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndx_f64(transmute(a)));
+    unsafe fn test_vld3q_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
+        let r: [i64x2; 3] = transmute(vld3q_p64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndxq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndxq_f64(transmute(a)));
+    unsafe fn test_vld3_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let r: [f64; 3] = transmute(vld3_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrnda_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 1.0);
-        let r: f32x2 = transmute(vrnda_f32(transmute(a)));
+    unsafe fn test_vld3q_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 2., 4., 4.];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 4.), f64x2::new(2., 4.)];
+        let r: [f64x2; 3] = transmute(vld3q_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndaq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0);
-        let r: f32x4 = transmute(vrndaq_f32(transmute(a)));
+    unsafe fn test_vld3q_dup_s64() {
+        let a: [i64; 7] = [0, 1, 1, 1, 3, 1, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 3] = transmute(vld3q_dup_s64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrnda_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnda_f64(transmute(a)));
+    unsafe fn test_vld3q_dup_u64() {
+        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
+        let e: [u64x2; 3] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
+        let r: [u64x2; 3] = transmute(vld3q_dup_u64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndaq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 1.0);
-        let r: f64x2 = transmute(vrndaq_f64(transmute(a)));
+    unsafe fn test_vld3q_dup_p64() {
+        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 3] = transmute(vld3q_dup_p64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndn_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndn_f64(transmute(a)));
+    unsafe fn test_vld3_dup_f64() {
+        let a: [f64; 4] = [0., 1., 1., 1.];
+        let e: [f64; 3] = [1., 1., 1.];
+        let r: [f64; 3] = transmute(vld3_dup_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndnq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
+    unsafe fn test_vld3q_dup_f64() {
+        let a: [f64; 7] = [0., 1., 1., 1., 3., 1., 4.];
+        let e: [f64x2; 3] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
+        let r: [f64x2; 3] = transmute(vld3q_dup_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndm_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndm_f32(transmute(a)));
+    unsafe fn test_vld3q_lane_s8() {
+        let a: [i8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let r: [i8x16; 3] = transmute(vld3q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndmq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0);
-        let r: f32x4 = transmute(vrndmq_f32(transmute(a)));
+    unsafe fn test_vld3_lane_s64() {
+        let a: [i64; 4] = [0, 1, 2, 2];
+        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 3] = transmute(vld3_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndm_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndm_f64(transmute(a)));
+    unsafe fn test_vld3q_lane_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 4, 5, 6];
+        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let r: [i64x2; 3] = transmute(vld3q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndmq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndmq_f64(transmute(a)));
+    unsafe fn test_vld3_lane_p64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 3] = transmute(vld3_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndp_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-1.0, 1.0);
-        let r: f32x2 = transmute(vrndp_f32(transmute(a)));
+    unsafe fn test_vld3q_lane_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
+        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let r: [i64x2; 3] = transmute(vld3q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndpq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0);
-        let r: f32x4 = transmute(vrndpq_f32(transmute(a)));
+    unsafe fn test_vld3q_lane_p8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let r: [i8x16; 3] = transmute(vld3q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndp_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -1.0;
-        let r: f64 = transmute(vrndp_f64(transmute(a)));
+    unsafe fn test_vld3q_lane_u8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [u8x16; 3] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let e: [u8x16; 3] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let r: [u8x16; 3] = transmute(vld3q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndpq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-1.0, 1.0);
-        let r: f64x2 = transmute(vrndpq_f64(transmute(a)));
+    unsafe fn test_vld3_lane_u64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let b: [u64x1; 3] = [u64x1::new(0), u64x1::new(2), u64x1::new(2)];
+        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(2)];
+        let r: [u64x1; 3] = transmute(vld3_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrnd_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-1.0, 0.0);
-        let r: f32x2 = transmute(vrnd_f32(transmute(a)));
+    unsafe fn test_vld3q_lane_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
+        let b: [u64x2; 3] = [u64x2::new(0, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
+        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
+        let r: [u64x2; 3] = transmute(vld3q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0);
-        let r: f32x4 = transmute(vrndq_f32(transmute(a)));
+    unsafe fn test_vld3_lane_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let b: [f64; 3] = [0., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let r: [f64; 3] = transmute(vld3_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrnd_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -1.0;
-        let r: f64 = transmute(vrnd_f64(transmute(a)));
+    unsafe fn test_vld3q_lane_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 4., 5., 6.];
+        let b: [f64x2; 3] = [f64x2::new(0., 2.), f64x2::new(2., 14.), f64x2::new(9., 16.)];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 14.), f64x2::new(2., 16.)];
+        let r: [f64x2; 3] = transmute(vld3q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-1.0, 0.0);
-        let r: f64x2 = transmute(vrndq_f64(transmute(a)));
+    unsafe fn test_vld4q_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
+        let r: [i64x2; 4] = transmute(vld4q_s64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndi_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndi_f32(transmute(a)));
+    unsafe fn test_vld4q_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 6), u64x2::new(2, 6), u64x2::new(6, 8)];
+        let r: [u64x2; 4] = transmute(vld4q_u64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndiq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndiq_f32(transmute(a)));
+    unsafe fn test_vld4q_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
+        let r: [i64x2; 4] = transmute(vld4q_p64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndi_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndi_f64(transmute(a)));
+    unsafe fn test_vld4_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 6.];
+        let e: [f64; 4] = [1., 2., 2., 6.];
+        let r: [f64; 4] = transmute(vld4_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrndiq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndiq_f64(transmute(a)));
+    unsafe fn test_vld4q_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 6.), f64x2::new(2., 6.), f64x2::new(6., 8.)];
+        let r: [f64x2; 4] = transmute(vld4q_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddb_s8() {
-        let a: i8 = 42;
-        let b: i8 = 1;
-        let e: i8 = 43;
-        let r: i8 = transmute(vqaddb_s8(transmute(a), transmute(b)));
+    unsafe fn test_vld4q_dup_s64() {
+        let a: [i64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
+        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 4] = transmute(vld4q_dup_s64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddh_s16() {
-        let a: i16 = 42;
-        let b: i16 = 1;
-        let e: i16 = 43;
-        let r: i16 = transmute(vqaddh_s16(transmute(a), transmute(b)));
+    unsafe fn test_vld4q_dup_u64() {
+        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
+        let e: [u64x2; 4] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
+        let r: [u64x2; 4] = transmute(vld4q_dup_u64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddb_u8() {
-        let a: u8 = 42;
-        let b: u8 = 1;
-        let e: u8 = 43;
-        let r: u8 = transmute(vqaddb_u8(transmute(a), transmute(b)));
+    unsafe fn test_vld4q_dup_p64() {
+        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
+        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 4] = transmute(vld4q_dup_p64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddh_u16() {
-        let a: u16 = 42;
-        let b: u16 = 1;
-        let e: u16 = 43;
-        let r: u16 = transmute(vqaddh_u16(transmute(a), transmute(b)));
+    unsafe fn test_vld4_dup_f64() {
+        let a: [f64; 5] = [0., 1., 1., 1., 1.];
+        let e: [f64; 4] = [1., 1., 1., 1.];
+        let r: [f64; 4] = transmute(vld4_dup_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadds_u32() {
-        let a: u32 = 42;
-        let b: u32 = 1;
-        let e: u32 = 43;
-        let r: u32 = transmute(vqadds_u32(transmute(a), transmute(b)));
+    unsafe fn test_vld4q_dup_f64() {
+        let a: [f64; 9] = [0., 1., 1., 1., 1., 6., 4., 3., 5.];
+        let e: [f64x2; 4] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
+        let r: [f64x2; 4] = transmute(vld4q_dup_f64(a[1..].as_ptr()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddd_u64() {
-        let a: u64 = 42;
-        let b: u64 = 1;
-        let e: u64 = 43;
-        let r: u64 = transmute(vqaddd_u64(transmute(a), transmute(b)));
+    unsafe fn test_vld4q_lane_s8() {
+        let a: [i8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
+        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let r: [i8x16; 4] = transmute(vld4q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadds_s32() {
-        let a: i32 = 42;
-        let b: i32 = 1;
-        let e: i32 = 43;
-        let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
+    unsafe fn test_vld4_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 2];
+        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 4] = transmute(vld4_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddd_s64() {
-        let a: i64 = 42;
-        let b: i64 = 1;
-        let e: i64 = 43;
-        let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
+    unsafe fn test_vld4q_lane_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
+        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let r: [i64x2; 4] = transmute(vld4q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64_x2() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
+    unsafe fn test_vld4_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 2];
+        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 4] = transmute(vld4_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64_x2() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
-        let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
+    unsafe fn test_vld4q_lane_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
+        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let r: [i64x2; 4] = transmute(vld4q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64_x3() {
-        let a: [f64; 4] = [0., 1., 2., 3.];
-        let e: [f64; 3] = [1., 2., 3.];
-        let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
+    unsafe fn test_vld4q_lane_p8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
+        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let r: [i8x16; 4] = transmute(vld4q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64_x3() {
-        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
-        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
-        let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
+    unsafe fn test_vld4q_lane_u8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
+        let b: [u8x16; 4] = [u8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let e: [u8x16; 4] = [u8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let r: [u8x16; 4] = transmute(vld4q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64_x4() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64; 4] = [1., 2., 3., 4.];
-        let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
+    unsafe fn test_vld4_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 2];
+        let b: [u64x1; 4] = [u64x1::new(0), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
+        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
+        let r: [u64x1; 4] = transmute(vld4_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64_x4() {
-        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
-        let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
+    unsafe fn test_vld4q_lane_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
+        let b: [u64x2; 4] = [u64x2::new(0, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
+        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
+        let r: [u64x2; 4] = transmute(vld4q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
-        let r: [i64x2; 2] = transmute(vld2q_s64(a[1..].as_ptr()));
+    unsafe fn test_vld4_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 2.];
+        let b: [f64; 4] = [0., 2., 2., 2.];
+        let e: [f64; 4] = [1., 2., 2., 2.];
+        let r: [f64; 4] = transmute(vld4_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 3)];
-        let r: [u64x2; 2] = transmute(vld2q_u64(a[1..].as_ptr()));
+    unsafe fn test_vld4q_lane_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 2., 5., 6., 7., 8.];
+        let b: [f64x2; 4] = [f64x2::new(0., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
+        let r: [f64x2; 4] = transmute(vld4q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
-        let r: [i64x2; 2] = transmute(vld2q_p64(a[1..].as_ptr()));
+    unsafe fn test_vst1_lane_f64() {
+        let a: [f64; 2] = [0., 1.];
+        let e: [f64; 1] = [1.];
+        let mut r: [f64; 1] = [0f64; 1];
+        vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_f64() {
+    unsafe fn test_vst1q_lane_f64() {
         let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let r: [f64; 2] = transmute(vld2_f64(a[1..].as_ptr()));
+        let e: [f64; 2] = [1., 0.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 3.];
-        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 3.)];
-        let r: [f64x2; 2] = transmute(vld2q_f64(a[1..].as_ptr()));
+    unsafe fn test_vst1_f64_x2() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_s64() {
-        let a: [i64; 5] = [0, 1, 1, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 2] = transmute(vld2q_dup_s64(a[1..].as_ptr()));
+    unsafe fn test_vst1q_f64_x2() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_u64() {
-        let a: [u64; 5] = [0, 1, 1, 2, 3];
-        let e: [u64x2; 2] = [u64x2::new(1, 1), u64x2::new(1, 1)];
-        let r: [u64x2; 2] = transmute(vld2q_dup_u64(a[1..].as_ptr()));
+    unsafe fn test_vst1_f64_x3() {
+        let a: [f64; 4] = [0., 1., 2., 3.];
+        let e: [f64; 3] = [1., 2., 3.];
+        let mut r: [f64; 3] = [0f64; 3];
+        vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_p64() {
-        let a: [u64; 5] = [0, 1, 1, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 2] = transmute(vld2q_dup_p64(a[1..].as_ptr()));
+    unsafe fn test_vst1q_f64_x3() {
+        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
+        let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
+        let mut r: [f64; 6] = [0f64; 6];
+        vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_f64() {
-        let a: [f64; 3] = [0., 1., 1.];
-        let e: [f64; 2] = [1., 1.];
-        let r: [f64; 2] = transmute(vld2_dup_f64(a[1..].as_ptr()));
+    unsafe fn test_vst1_f64_x4() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_f64() {
-        let a: [f64; 5] = [0., 1., 1., 2., 3.];
-        let e: [f64x2; 2] = [f64x2::new(1., 1.), f64x2::new(1., 1.)];
-        let r: [f64x2; 2] = transmute(vld2q_dup_f64(a[1..].as_ptr()));
+    unsafe fn test_vst1q_f64_x4() {
+        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
+        let mut r: [f64; 8] = [0f64; 8];
+        vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_s8() {
-        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x16; 2] = transmute(vld2q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64; 4] = [1, 2, 2, 3];
+        let mut r: [i64; 4] = [0i64; 4];
+        vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld2_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 2, 3];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 3, 4];
-        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
-        let r: [i64x2; 2] = transmute(vld2q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 2, 3];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld2_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
-        let r: [i64x2; 2] = transmute(vld2q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 3.];
+        let e: [f64; 4] = [1., 2., 2., 3.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_u8() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x16; 2] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [u8x16; 2] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [u8x16; 2] = transmute(vld2q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_lane_s8() {
+        let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+        let e: [i8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [i8; 32] = [0i8; 32];
+        vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let b: [u64x1; 2] = [u64x1::new(0), u64x1::new(2)];
-        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
-        let r: [u64x1; 2] = transmute(vld2_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2_lane_s64() {
+        let a: [i64; 3] = [0, 1, 2];
+        let e: [i64; 2] = [1, 2];
+        let mut r: [i64; 2] = [0i64; 2];
+        vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let b: [u64x2; 2] = [u64x2::new(0, 2), u64x2::new(2, 14)];
-        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 14)];
-        let r: [u64x2; 2] = transmute(vld2q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64; 4] = [1, 2, 0, 0];
+        let mut r: [i64; 4] = [0i64; 4];
+        vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_p8() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x16; 2] = transmute(vld2q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_lane_u8() {
+        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 32] = [0u8; 32];
+        vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let b: [f64; 2] = [0., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let r: [f64; 2] = transmute(vld2_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2_lane_u64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let e: [u64; 2] = [1, 2];
+        let mut r: [u64; 2] = [0u64; 2];
+        vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let b: [f64x2; 2] = [f64x2::new(0., 2.), f64x2::new(2., 14.)];
-        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 14.)];
-        let r: [f64x2; 2] = transmute(vld2q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst2q_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 0, 0];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
-        let r: [i64x2; 3] = transmute(vld3q_s64(a[1..].as_ptr()));
+    unsafe fn test_vst2q_lane_p8() {
+        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 32] = [0u8; 32];
+        vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 4), u64x2::new(2, 4)];
-        let r: [u64x2; 3] = transmute(vld3q_u64(a[1..].as_ptr()));
+    unsafe fn test_vst2_lane_p64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let e: [u64; 2] = [1, 2];
+        let mut r: [u64; 2] = [0u64; 2];
+        vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
-        let r: [i64x2; 3] = transmute(vld3q_p64(a[1..].as_ptr()));
+    unsafe fn test_vst2q_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 0, 0];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let r: [f64; 3] = transmute(vld3_f64(a[1..].as_ptr()));
+    unsafe fn test_vst2_lane_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 2., 4., 4.];
-        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 4.), f64x2::new(2., 4.)];
-        let r: [f64x2; 3] = transmute(vld3q_f64(a[1..].as_ptr()));
+    unsafe fn test_vst2q_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 3.];
+        let e: [f64; 4] = [1., 2., 0., 0.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_s64() {
-        let a: [i64; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 3] = transmute(vld3q_dup_s64(a[1..].as_ptr()));
+    unsafe fn test_vst3q_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [i64; 6] = [1, 2, 2, 2, 4, 4];
+        let mut r: [i64; 6] = [0i64; 6];
+        vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_u64() {
-        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [u64x2; 3] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
-        let r: [u64x2; 3] = transmute(vld3q_dup_u64(a[1..].as_ptr()));
+    unsafe fn test_vst3q_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_p64() {
-        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 3] = transmute(vld3q_dup_p64(a[1..].as_ptr()));
+    unsafe fn test_vst3q_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_f64() {
-        let a: [f64; 4] = [0., 1., 1., 1.];
-        let e: [f64; 3] = [1., 1., 1.];
-        let r: [f64; 3] = transmute(vld3_dup_f64(a[1..].as_ptr()));
+    unsafe fn test_vst3_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let mut r: [f64; 3] = [0f64; 3];
+        vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_f64() {
-        let a: [f64; 7] = [0., 1., 1., 1., 3., 1., 4.];
-        let e: [f64x2; 3] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
-        let r: [f64x2; 3] = transmute(vld3q_dup_f64(a[1..].as_ptr()));
+    unsafe fn test_vst3q_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 4., 2., 4.];
+        let e: [f64; 6] = [1., 2., 2., 2., 4., 4.];
+        let mut r: [f64; 6] = [0f64; 6];
+        vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_s8() {
-        let a: [i8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let r: [i8x16; 3] = transmute(vld3q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_s8() {
+        let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
+        let e: [i8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [i8; 48] = [0i8; 48];
+        vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_s64() {
+    unsafe fn test_vst3_lane_s64() {
         let a: [i64; 4] = [0, 1, 2, 2];
-        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 3] = transmute(vld3_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        let e: [i64; 3] = [1, 2, 2];
+        let mut r: [i64; 3] = [0i64; 3];
+        vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let r: [i64x2; 3] = transmute(vld3q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [i64; 6] = [1, 2, 2, 0, 0, 0];
+        let mut r: [i64; 6] = [0i64; 6];
+        vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_p64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 3] = transmute(vld3_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_u8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
+        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 48] = [0u8; 48];
+        vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let r: [i64x2; 3] = transmute(vld3q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3_lane_u64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let e: [u64; 3] = [1, 2, 2];
+        let mut r: [u64; 3] = [0u64; 3];
+        vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_p8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let r: [i8x16; 3] = transmute(vld3q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_u8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x16; 3] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let e: [u8x16; 3] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let r: [u8x16; 3] = transmute(vld3q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_p8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
+        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 48] = [0u8; 48];
+        vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_u64() {
+    unsafe fn test_vst3_lane_p64() {
         let a: [u64; 4] = [0, 1, 2, 2];
-        let b: [u64x1; 3] = [u64x1::new(0), u64x1::new(2), u64x1::new(2)];
-        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(2)];
-        let r: [u64x1; 3] = transmute(vld3_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        let e: [u64; 3] = [1, 2, 2];
+        let mut r: [u64; 3] = [0u64; 3];
+        vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [u64x2; 3] = [u64x2::new(0, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
-        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
-        let r: [u64x2; 3] = transmute(vld3q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_f64() {
+    unsafe fn test_vst3_lane_f64() {
         let a: [f64; 4] = [0., 1., 2., 2.];
-        let b: [f64; 3] = [0., 2., 2.];
         let e: [f64; 3] = [1., 2., 2.];
-        let r: [f64; 3] = transmute(vld3_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        let mut r: [f64; 3] = [0f64; 3];
+        vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 4., 5., 6.];
-        let b: [f64x2; 3] = [f64x2::new(0., 2.), f64x2::new(2., 14.), f64x2::new(9., 16.)];
-        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 14.), f64x2::new(2., 16.)];
-        let r: [f64x2; 3] = transmute(vld3q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst3q_lane_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 3., 2., 3.];
+        let e: [f64; 6] = [1., 2., 2., 0., 0., 0.];
+        let mut r: [f64; 6] = [0f64; 6];
+        vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_s64() {
+    unsafe fn test_vst4q_s64() {
         let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
-        let r: [i64x2; 4] = transmute(vld4q_s64(a[1..].as_ptr()));
+        let e: [i64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
+        let mut r: [i64; 8] = [0i64; 8];
+        vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_u64() {
+    unsafe fn test_vst4q_u64() {
         let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 6), u64x2::new(2, 6), u64x2::new(6, 8)];
-        let r: [u64x2; 4] = transmute(vld4q_u64(a[1..].as_ptr()));
+        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_p64() {
+    unsafe fn test_vst4q_p64() {
         let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
-        let r: [i64x2; 4] = transmute(vld4q_p64(a[1..].as_ptr()));
+        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_f64() {
+    unsafe fn test_vst4_f64() {
         let a: [f64; 5] = [0., 1., 2., 2., 6.];
         let e: [f64; 4] = [1., 2., 2., 6.];
-        let r: [f64; 4] = transmute(vld4_f64(a[1..].as_ptr()));
+        let mut r: [f64; 4] = [0f64; 4];
+        vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_f64() {
+    unsafe fn test_vst4q_f64() {
         let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 6.), f64x2::new(2., 6.), f64x2::new(6., 8.)];
-        let r: [f64x2; 4] = transmute(vld4q_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_s64() {
-        let a: [i64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 4] = transmute(vld4q_dup_s64(a[1..].as_ptr()));
+        let e: [f64; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
+        let mut r: [f64; 8] = [0f64; 8];
+        vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_u64() {
-        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [u64x2; 4] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
-        let r: [u64x2; 4] = transmute(vld4q_dup_u64(a[1..].as_ptr()));
+    unsafe fn test_vst4q_lane_s8() {
+        let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
+        let e: [i8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [i8; 64] = [0i8; 64];
+        vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_p64() {
-        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 4] = transmute(vld4q_dup_p64(a[1..].as_ptr()));
+    unsafe fn test_vst4_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 6];
+        let e: [i64; 4] = [1, 2, 2, 6];
+        let mut r: [i64; 4] = [0i64; 4];
+        vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_f64() {
-        let a: [f64; 5] = [0., 1., 1., 1., 1.];
-        let e: [f64; 4] = [1., 1., 1., 1.];
-        let r: [f64; 4] = transmute(vld4_dup_f64(a[1..].as_ptr()));
+    unsafe fn test_vst4q_lane_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
+        let mut r: [i64; 8] = [0i64; 8];
+        vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_f64() {
-        let a: [f64; 9] = [0., 1., 1., 1., 1., 6., 4., 3., 5.];
-        let e: [f64x2; 4] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
-        let r: [f64x2; 4] = transmute(vld4q_dup_f64(a[1..].as_ptr()));
+    unsafe fn test_vst4q_lane_u8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
+        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 64] = [0u8; 64];
+        vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_s8() {
-        let a: [i8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
-        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i8x16; 4] = transmute(vld4q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 6];
+        let e: [u64; 4] = [1, 2, 2, 6];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 2];
-        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 4] = transmute(vld4_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4q_lane_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let r: [i64x2; 4] = transmute(vld4q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4q_lane_p8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
+        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 64] = [0u8; 64];
+        vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 2];
-        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 4] = transmute(vld4_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 6];
+        let e: [u64; 4] = [1, 2, 2, 6];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let r: [i64x2; 4] = transmute(vld4q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4q_lane_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_p8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
-        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i8x16; 4] = transmute(vld4q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 6.];
+        let e: [f64; 4] = [1., 2., 2., 6.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_u8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
-        let b: [u8x16; 4] = [u8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let e: [u8x16; 4] = [u8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [u8x16; 4] = transmute(vld4q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vst4q_lane_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
+        let e: [f64; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
+        let mut r: [f64; 8] = [0f64; 8];
+        vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 2];
-        let b: [u64x1; 4] = [u64x1::new(0), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
-        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
-        let r: [u64x1; 4] = transmute(vld4_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vmul_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 2.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vmul_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [u64x2; 4] = [u64x2::new(0, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
-        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
-        let r: [u64x2; 4] = transmute(vld4q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vmulq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(2.0, 3.0);
+        let e: f64x2 = f64x2::new(2.0, 6.0);
+        let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 2.];
-        let b: [f64; 4] = [0., 2., 2., 2.];
-        let e: [f64; 4] = [1., 2., 2., 2.];
-        let r: [f64; 4] = transmute(vld4_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vmul_n_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmul_n_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 2., 5., 6., 7., 8.];
-        let b: [f64x2; 4] = [f64x2::new(0., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
-        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
-        let r: [f64x2; 4] = transmute(vld4q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+    unsafe fn test_vmulq_n_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulq_n_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_f64() {
-        let a: [f64; 2] = [0., 1.];
-        let e: [f64; 1] = [1.];
-        let mut r: [f64; 1] = [0f64; 1];
-        vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmul_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmul_lane_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 0.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmul_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmul_laneq_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64_x2() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulq_lane_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulq_lane_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64_x2() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64; 4] = [1., 2., 3., 4.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulq_laneq_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64_x3() {
-        let a: [f64; 4] = [0., 1., 2., 3.];
-        let e: [f64; 3] = [1., 2., 3.];
-        let mut r: [f64; 3] = [0f64; 3];
-        vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmuls_lane_f32() {
+        let a: f32 = 1.;
+        let b: f32x2 = f32x2::new(2., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmuls_lane_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64_x3() {
-        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
-        let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
-        let mut r: [f64; 6] = [0f64; 6];
-        vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmuls_laneq_f32() {
+        let a: f32 = 1.;
+        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmuls_laneq_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64_x4() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64; 4] = [1., 2., 3., 4.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmuld_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmuld_lane_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64_x4() {
-        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
-        let mut r: [f64; 8] = [0f64; 8];
-        vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmuld_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmuld_laneq_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64; 4] = [1, 2, 2, 3];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
+        let e: i16x8 = i16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
+        let r: i16x8 = transmute(vmull_high_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 2, 3];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
+        let e: i32x4 = i32x4::new(9, 20, 11, 24);
+        let r: i32x4 = transmute(vmull_high_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 2, 3];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32x4 = i32x4::new(1, 2, 1, 2);
+        let e: i64x2 = i64x2::new(9, 20);
+        let r: i64x2 = transmute(vmull_high_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
+        let e: u16x8 = u16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
+        let r: u16x8 = transmute(vmull_high_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 3.];
-        let e: [f64; 4] = [1., 2., 2., 3.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
+        let e: u32x4 = u32x4::new(9, 20, 11, 24);
+        let r: u32x4 = transmute(vmull_high_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [i8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32x4 = u32x4::new(1, 2, 1, 2);
+        let e: u64x2 = u64x2::new(9, 20);
+        let r: u64x2 = transmute(vmull_high_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64; 2] = [1, 2];
-        let mut r: [i64; 2] = [0i64; 2];
-        vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_p64() {
+        let a: p64 = 15;
+        let b: p64 = 3;
+        let e: p128 = 17;
+        let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64; 4] = [1, 2, 0, 0];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
+        let e: i16x8 = i16x8::new(9, 30, 11, 20, 13, 18, 15, 48);
+        let r: i16x8 = transmute(vmull_high_p8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_p64() {
+        let a: i64x2 = i64x2::new(1, 15);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: p128 = 17;
+        let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_n_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16 = 2;
+        let e: i32x4 = i32x4::new(18, 20, 22, 24);
+        let r: i32x4 = transmute(vmull_high_n_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 0, 0];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_n_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32 = 2;
+        let e: i64x2 = i64x2::new(18, 20);
+        let r: i64x2 = transmute(vmull_high_n_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_n_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16 = 2;
+        let e: u32x4 = u32x4::new(18, 20, 22, 24);
+        let r: u32x4 = transmute(vmull_high_n_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_n_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32 = 2;
+        let e: u64x2 = u64x2::new(18, 20);
+        let r: u64x2 = transmute(vmull_high_n_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 0, 0];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_lane_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(18, 20, 22, 24);
+        let r: i32x4 = transmute(vmull_high_lane_s16::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(18, 20, 22, 24);
+        let r: i32x4 = transmute(vmull_high_laneq_s16::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 3.];
-        let e: [f64; 4] = [1., 2., 0., 0.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(18, 20);
+        let r: i64x2 = transmute(vmull_high_lane_s32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [i64; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [i64; 6] = [0i64; 6];
-        vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(18, 20);
+        let r: i64x2 = transmute(vmull_high_laneq_s32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_lane_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16x4 = u16x4::new(0, 2, 0, 0);
+        let e: u32x4 = u32x4::new(18, 20, 22, 24);
+        let r: u32x4 = transmute(vmull_high_lane_u16::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(18, 20, 22, 24);
+        let r: u32x4 = transmute(vmull_high_laneq_u16::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let mut r: [f64; 3] = [0f64; 3];
-        vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32x2 = u32x2::new(0, 2);
+        let e: u64x2 = u64x2::new(18, 20);
+        let r: u64x2 = transmute(vmull_high_lane_u32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 4., 2., 4.];
-        let e: [f64; 6] = [1., 2., 2., 2., 4., 4.];
-        let mut r: [f64; 6] = [0f64; 6];
-        vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmull_high_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32x4 = u32x4::new(0, 2, 0, 0);
+        let e: u64x2 = u64x2::new(18, 20);
+        let r: u64x2 = transmute(vmull_high_laneq_u32::<1>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_s8() {
-        let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [i8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 48] = [0i8; 48];
-        vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulx_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(2., 2.);
+        let e: f32x2 = f32x2::new(2., 4.);
+        let r: f32x2 = transmute(vmulx_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_s64() {
-        let a: [i64; 4] = [0, 1, 2, 2];
-        let e: [i64; 3] = [1, 2, 2];
-        let mut r: [i64; 3] = [0i64; 3];
-        vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
+        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
+        let r: f32x4 = transmute(vmulxq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [i64; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [i64; 6] = [0i64; 6];
-        vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulx_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmulx_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_u8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(2., 2.);
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulxq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_u64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64; 3] = [1, 2, 2];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulx_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmulx_lane_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulx_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmulx_laneq_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_p8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulx_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(2., 0.);
+        let e: f32x2 = f32x2::new(2., 4.);
+        let r: f32x2 = transmute(vmulx_lane_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_p64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64; 3] = [1, 2, 2];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulx_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x2 = f32x2::new(2., 4.);
+        let r: f32x2 = transmute(vmulx_laneq_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxq_lane_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x2 = f32x2::new(2., 0.);
+        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
+        let r: f32x4 = transmute(vmulxq_lane_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let mut r: [f64; 3] = [0f64; 3];
-        vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
+        let r: f32x4 = transmute(vmulxq_laneq_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 3., 2., 3.];
-        let e: [f64; 6] = [1., 2., 2., 0., 0., 0.];
-        let mut r: [f64; 6] = [0f64; 6];
-        vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxq_lane_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulxq_lane_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [i64; 8] = [0i64; 8];
-        vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulxq_laneq_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxs_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 3.;
+        let e: f32 = 6.;
+        let r: f32 = transmute(vmulxs_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxd_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 3.;
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmulxd_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 6.];
-        let e: [f64; 4] = [1., 2., 2., 6.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxs_lane_f32() {
+        let a: f32 = 2.;
+        let b: f32x2 = f32x2::new(3., 0.);
+        let e: f32 = 6.;
+        let r: f32 = transmute(vmulxs_lane_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f64; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
-        let mut r: [f64; 8] = [0f64; 8];
-        vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxs_laneq_f32() {
+        let a: f32 = 2.;
+        let b: f32x4 = f32x4::new(3., 0., 0., 0.);
+        let e: f32 = 6.;
+        let r: f32 = transmute(vmulxs_laneq_f32::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_s8() {
-        let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [i8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 64] = [0i8; 64];
-        vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxd_lane_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 3.;
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmulxd_lane_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 6];
-        let e: [i64; 4] = [1, 2, 2, 6];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vmulxd_laneq_f64() {
+        let a: f64 = 2.;
+        let b: f64x2 = f64x2::new(3., 0.);
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmulxd_laneq_f64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [i64; 8] = [0i64; 8];
-        vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfma_f64() {
+        let a: f64 = 8.0;
+        let b: f64 = 6.0;
+        let c: f64 = 2.0;
+        let e: f64 = 20.0;
+        let r: f64 = transmute(vfma_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_u8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfmaq_f64() {
+        let a: f64x2 = f64x2::new(8.0, 18.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64x2 = f64x2::new(2.0, 3.0);
+        let e: f64x2 = f64x2::new(20.0, 30.0);
+        let r: f64x2 = transmute(vfmaq_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64; 4] = [1, 2, 2, 6];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfma_n_f64() {
+        let a: f64 = 2.0;
+        let b: f64 = 6.0;
+        let c: f64 = 8.0;
+        let e: f64 = 50.0;
+        let r: f64 = transmute(vfma_n_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfmaq_n_f64() {
+        let a: f64x2 = f64x2::new(2.0, 3.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64 = 8.0;
+        let e: f64x2 = f64x2::new(50.0, 35.0);
+        let r: f64x2 = transmute(vfmaq_n_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_p8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfma_lane_f32() {
+        let a: f32x2 = f32x2::new(2., 3.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x2 = f32x2::new(14., 11.);
+        let r: f32x2 = transmute(vfma_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64; 4] = [1, 2, 2, 6];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfma_laneq_f32() {
+        let a: f32x2 = f32x2::new(2., 3.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x2 = f32x2::new(14., 11.);
+        let r: f32x2 = transmute(vfma_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfmaq_lane_f32() {
+        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let r: f32x4 = transmute(vfmaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 6.];
-        let e: [f64; 4] = [1., 2., 2., 6.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfmaq_laneq_f32() {
+        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let r: f32x4 = transmute(vfmaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f64; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
-        let mut r: [f64; 8] = [0f64; 8];
-        vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
+    unsafe fn test_vfma_lane_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64 = 2.;
+        let e: f64 = 14.;
+        let r: f64 = transmute(vfma_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 2.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vmul_f64(transmute(a), transmute(b)));
+    unsafe fn test_vfma_laneq_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 14.;
+        let r: f64 = transmute(vfma_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(2.0, 3.0);
-        let e: f64x2 = f64x2::new(2.0, 6.0);
-        let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vfmaq_lane_f64() {
+        let a: f64x2 = f64x2::new(2., 3.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64 = 2.;
+        let e: f64x2 = f64x2::new(14., 11.);
+        let r: f64x2 = transmute(vfmaq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmul_n_f64(transmute(a), transmute(b)));
+    unsafe fn test_vfmaq_laneq_f64() {
+        let a: f64x2 = f64x2::new(2., 3.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(14., 11.);
+        let r: f64x2 = transmute(vfmaq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulq_n_f64(transmute(a), transmute(b)));
+    unsafe fn test_vfmas_lane_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 6.;
+        let c: f32x2 = f32x2::new(3., 0.);
+        let e: f32 = 20.;
+        let r: f32 = transmute(vfmas_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmul_lane_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfmas_laneq_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 6.;
+        let c: f32x4 = f32x4::new(3., 0., 0., 0.);
+        let e: f32 = 20.;
+        let r: f32 = transmute(vfmas_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmul_laneq_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfmad_lane_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64 = 3.;
+        let e: f64 = 20.;
+        let r: f64 = transmute(vfmad_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulq_lane_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfmad_laneq_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(3., 0.);
+        let e: f64 = 20.;
+        let r: f64 = transmute(vfmad_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulq_laneq_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfms_f64() {
+        let a: f64 = 20.0;
+        let b: f64 = 6.0;
+        let c: f64 = 2.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vfms_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmuls_lane_f32() {
-        let a: f32 = 1.;
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32 = 2.;
-        let r: f32 = transmute(vmuls_lane_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfmsq_f64() {
+        let a: f64x2 = f64x2::new(20.0, 30.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64x2 = f64x2::new(2.0, 3.0);
+        let e: f64x2 = f64x2::new(8.0, 18.0);
+        let r: f64x2 = transmute(vfmsq_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmuls_laneq_f32() {
-        let a: f32 = 1.;
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32 = 2.;
-        let r: f32 = transmute(vmuls_laneq_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfms_n_f64() {
+        let a: f64 = 50.0;
+        let b: f64 = 6.0;
+        let c: f64 = 8.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vfms_n_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmuld_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmuld_lane_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfmsq_n_f64() {
+        let a: f64x2 = f64x2::new(50.0, 35.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64 = 8.0;
+        let e: f64x2 = f64x2::new(2.0, 3.0);
+        let r: f64x2 = transmute(vfmsq_n_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmuld_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmuld_laneq_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vfms_lane_f32() {
+        let a: f32x2 = f32x2::new(14., 11.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x2 = f32x2::new(2., 3.);
+        let r: f32x2 = transmute(vfms_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i16x8 = i16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
-        let r: i16x8 = transmute(vmull_high_s8(transmute(a), transmute(b)));
+    unsafe fn test_vfms_laneq_f32() {
+        let a: f32x2 = f32x2::new(14., 11.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x2 = f32x2::new(2., 3.);
+        let r: f32x2 = transmute(vfms_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i32x4 = i32x4::new(9, 20, 11, 24);
-        let r: i32x4 = transmute(vmull_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vfmsq_lane_f32() {
+        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let r: f32x4 = transmute(vfmsq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32x4 = i32x4::new(1, 2, 1, 2);
-        let e: i64x2 = i64x2::new(9, 20);
-        let r: i64x2 = transmute(vmull_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vfmsq_laneq_f32() {
+        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let r: f32x4 = transmute(vfmsq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u16x8 = u16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
-        let r: u16x8 = transmute(vmull_high_u8(transmute(a), transmute(b)));
+    unsafe fn test_vfms_lane_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfms_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u32x4 = u32x4::new(9, 20, 11, 24);
-        let r: u32x4 = transmute(vmull_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vfms_laneq_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfms_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32x4 = u32x4::new(1, 2, 1, 2);
-        let e: u64x2 = u64x2::new(9, 20);
-        let r: u64x2 = transmute(vmull_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vfmsq_lane_f64() {
+        let a: f64x2 = f64x2::new(14., 11.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vfmsq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_p64() {
-        let a: p64 = 15;
-        let b: p64 = 3;
-        let e: p128 = 17;
-        let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
+    unsafe fn test_vfmsq_laneq_f64() {
+        let a: f64x2 = f64x2::new(14., 11.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vfmsq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
-        let e: i16x8 = i16x8::new(9, 30, 11, 20, 13, 18, 15, 48);
-        let r: i16x8 = transmute(vmull_high_p8(transmute(a), transmute(b)));
+    unsafe fn test_vfmss_lane_f32() {
+        let a: f32 = 14.;
+        let b: f32 = 6.;
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vfmss_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_p64() {
-        let a: i64x2 = i64x2::new(1, 15);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: p128 = 17;
-        let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
+    unsafe fn test_vfmss_laneq_f32() {
+        let a: f32 = 14.;
+        let b: f32 = 6.;
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vfmss_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16 = 2;
-        let e: i32x4 = i32x4::new(18, 20, 22, 24);
-        let r: i32x4 = transmute(vmull_high_n_s16(transmute(a), transmute(b)));
+    unsafe fn test_vfmsd_lane_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfmsd_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32 = 2;
-        let e: i64x2 = i64x2::new(18, 20);
-        let r: i64x2 = transmute(vmull_high_n_s32(transmute(a), transmute(b)));
+    unsafe fn test_vfmsd_laneq_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfmsd_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16 = 2;
-        let e: u32x4 = u32x4::new(18, 20, 22, 24);
-        let r: u32x4 = transmute(vmull_high_n_u16(transmute(a), transmute(b)));
+    unsafe fn test_vdiv_f32() {
+        let a: f32x2 = f32x2::new(2.0, 6.0);
+        let b: f32x2 = f32x2::new(1.0, 2.0);
+        let e: f32x2 = f32x2::new(2.0, 3.0);
+        let r: f32x2 = transmute(vdiv_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32 = 2;
-        let e: u64x2 = u64x2::new(18, 20);
-        let r: u64x2 = transmute(vmull_high_n_u32(transmute(a), transmute(b)));
+    unsafe fn test_vdivq_f32() {
+        let a: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0);
+        let b: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0);
+        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
+        let r: f32x4 = transmute(vdivq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(18, 20, 22, 24);
-        let r: i32x4 = transmute(vmull_high_lane_s16::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vdiv_f64() {
+        let a: f64 = 2.0;
+        let b: f64 = 1.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vdiv_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(18, 20, 22, 24);
-        let r: i32x4 = transmute(vmull_high_laneq_s16::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vdivq_f64() {
+        let a: f64x2 = f64x2::new(2.0, 6.0);
+        let b: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(2.0, 3.0);
+        let r: f64x2 = transmute(vdivq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(18, 20);
-        let r: i64x2 = transmute(vmull_high_lane_s32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vsub_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 1.0;
+        let e: f64 = 0.0;
+        let r: f64 = transmute(vsub_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(18, 20);
-        let r: i64x2 = transmute(vmull_high_laneq_s32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vsubq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 4.0);
+        let b: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(0.0, 2.0);
+        let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(18, 20, 22, 24);
-        let r: u32x4 = transmute(vmull_high_lane_u16::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vsubd_s64() {
+        let a: i64 = 3;
+        let b: i64 = 2;
+        let e: i64 = 1;
+        let r: i64 = transmute(vsubd_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(18, 20, 22, 24);
-        let r: u32x4 = transmute(vmull_high_laneq_u16::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vsubd_u64() {
+        let a: u64 = 3;
+        let b: u64 = 2;
+        let e: u64 = 1;
+        let r: u64 = transmute(vsubd_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(18, 20);
-        let r: u64x2 = transmute(vmull_high_lane_u32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vaddd_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: i64 = 3;
+        let r: i64 = transmute(vaddd_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(18, 20);
-        let r: u64x2 = transmute(vmull_high_laneq_u32::<1>(transmute(a), transmute(b)));
+    unsafe fn test_vaddd_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 3;
+        let r: u64 = transmute(vaddd_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_f32() {
+    unsafe fn test_vaddv_f32() {
         let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmulx_f32(transmute(a), transmute(b)));
+        let e: f32 = 3.;
+        let r: f32 = transmute(vaddv_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulxq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vaddvq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 0., 0.);
+        let e: f32 = 3.;
+        let r: f32 = transmute(vaddvq_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmulx_f64(transmute(a), transmute(b)));
+    unsafe fn test_vaddvq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 3.;
+        let r: f64 = transmute(vaddvq_f64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulxq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vaddlv_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: i32 = 10;
+        let r: i32 = transmute(vaddlv_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmulx_lane_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlvq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32 = 36;
+        let r: i32 = transmute(vaddlvq_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmulx_laneq_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlv_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let e: i64 = 3;
+        let r: i64 = transmute(vaddlv_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmulx_lane_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlvq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i64 = 10;
+        let r: i64 = transmute(vaddlvq_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmulx_laneq_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlv_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u32 = 10;
+        let r: u32 = transmute(vaddlv_u16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_lane_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulxq_lane_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlvq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32 = 36;
+        let r: u32 = transmute(vaddlvq_u16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulxq_laneq_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlv_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let e: u64 = 3;
+        let r: u64 = transmute(vaddlv_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_lane_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulxq_lane_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vaddlvq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u64 = 10;
+        let r: u64 = transmute(vaddlvq_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulxq_laneq_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vsubw_high_s8() {
+        let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16);
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxs_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 3.;
-        let e: f32 = 6.;
-        let r: f32 = transmute(vmulxs_f32(transmute(a), transmute(b)));
+    unsafe fn test_vsubw_high_s16() {
+        let a: i32x4 = i32x4::new(8, 9, 10, 11);
+        let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
+        let e: i32x4 = i32x4::new(0, 0, 0, 0);
+        let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxd_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 3.;
-        let e: f64 = 6.;
-        let r: f64 = transmute(vmulxd_f64(transmute(a), transmute(b)));
+    unsafe fn test_vsubw_high_s32() {
+        let a: i64x2 = i64x2::new(8, 9);
+        let b: i32x4 = i32x4::new(6, 7, 8, 9);
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxs_lane_f32() {
-        let a: f32 = 2.;
-        let b: f32x2 = f32x2::new(3., 0.);
-        let e: f32 = 6.;
-        let r: f32 = transmute(vmulxs_lane_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vsubw_high_u8() {
+        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxs_laneq_f32() {
-        let a: f32 = 2.;
-        let b: f32x4 = f32x4::new(3., 0., 0., 0.);
-        let e: f32 = 6.;
-        let r: f32 = transmute(vmulxs_laneq_f32::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vsubw_high_u16() {
+        let a: u32x4 = u32x4::new(8, 9, 10, 11);
+        let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0);
+        let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxd_lane_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 3.;
-        let e: f64 = 6.;
-        let r: f64 = transmute(vmulxd_lane_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vsubw_high_u32() {
+        let a: u64x2 = u64x2::new(8, 9);
+        let b: u32x4 = u32x4::new(6, 7, 8, 9);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxd_laneq_f64() {
-        let a: f64 = 2.;
-        let b: f64x2 = f64x2::new(3., 0.);
-        let e: f64 = 6.;
-        let r: f64 = transmute(vmulxd_laneq_f64::<0>(transmute(a), transmute(b)));
+    unsafe fn test_vsubl_high_s8() {
+        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_f64() {
-        let a: f64 = 8.0;
-        let b: f64 = 6.0;
-        let c: f64 = 2.0;
-        let e: f64 = 20.0;
-        let r: f64 = transmute(vfma_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vsubl_high_s16() {
+        let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
+        let e: i32x4 = i32x4::new(4, 5, 6, 7);
+        let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_f64() {
-        let a: f64x2 = f64x2::new(8.0, 18.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64x2 = f64x2::new(2.0, 3.0);
-        let e: f64x2 = f64x2::new(20.0, 30.0);
-        let r: f64x2 = transmute(vfmaq_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vsubl_high_s32() {
+        let a: i32x4 = i32x4::new(12, 13, 14, 15);
+        let b: i32x4 = i32x4::new(6, 6, 8, 8);
+        let e: i64x2 = i64x2::new(6, 7);
+        let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_n_f64() {
-        let a: f64 = 2.0;
-        let b: f64 = 6.0;
-        let c: f64 = 8.0;
-        let e: f64 = 50.0;
-        let r: f64 = transmute(vfma_n_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vsubl_high_u8() {
+        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_n_f64() {
-        let a: f64x2 = f64x2::new(2.0, 3.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64 = 8.0;
-        let e: f64x2 = f64x2::new(50.0, 35.0);
-        let r: f64x2 = transmute(vfmaq_n_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vsubl_high_u16() {
+        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
+        let e: u32x4 = u32x4::new(4, 5, 6, 7);
+        let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_lane_f32() {
-        let a: f32x2 = f32x2::new(2., 3.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(14., 11.);
-        let r: f32x2 = transmute(vfma_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vsubl_high_u32() {
+        let a: u32x4 = u32x4::new(12, 13, 14, 15);
+        let b: u32x4 = u32x4::new(6, 6, 8, 8);
+        let e: u64x2 = u64x2::new(6, 7);
+        let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_laneq_f32() {
-        let a: f32x2 = f32x2::new(2., 3.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(14., 11.);
-        let r: f32x2 = transmute(vfma_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_s8() {
+        let a: i8x16 = i8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
+        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let c: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i8x16 = i8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        let r: i8x16 = transmute(vbcaxq_s8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_lane_f32() {
-        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let r: f32x4 = transmute(vfmaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_s16() {
+        let a: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
+        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let c: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i16x8 = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
+        let r: i16x8 = transmute(vbcaxq_s16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_laneq_f32() {
-        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let r: f32x4 = transmute(vfmaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_s32() {
+        let a: i32x4 = i32x4::new(1, 0, 1, 0);
+        let b: i32x4 = i32x4::new(0, 1, 2, 3);
+        let c: i32x4 = i32x4::new(1, 1, 1, 1);
+        let e: i32x4 = i32x4::new(1, 0, 3, 2);
+        let r: i32x4 = transmute(vbcaxq_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_lane_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64 = 2.;
-        let e: f64 = 14.;
-        let r: f64 = transmute(vfma_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_s64() {
+        let a: i64x2 = i64x2::new(1, 0);
+        let b: i64x2 = i64x2::new(0, 1);
+        let c: i64x2 = i64x2::new(1, 1);
+        let e: i64x2 = i64x2::new(1, 0);
+        let r: i64x2 = transmute(vbcaxq_s64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_laneq_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 14.;
-        let r: f64 = transmute(vfma_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_u8() {
+        let a: u8x16 = u8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
+        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let c: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e: u8x16 = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        let r: u8x16 = transmute(vbcaxq_u8(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_lane_f64() {
-        let a: f64x2 = f64x2::new(2., 3.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64 = 2.;
-        let e: f64x2 = f64x2::new(14., 11.);
-        let r: f64x2 = transmute(vfmaq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_u16() {
+        let a: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
+        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let c: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let e: u16x8 = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
+        let r: u16x8 = transmute(vbcaxq_u16(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_laneq_f64() {
-        let a: f64x2 = f64x2::new(2., 3.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(14., 11.);
-        let r: f64x2 = transmute(vfmaq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_u32() {
+        let a: u32x4 = u32x4::new(1, 0, 1, 0);
+        let b: u32x4 = u32x4::new(0, 1, 2, 3);
+        let c: u32x4 = u32x4::new(1, 1, 1, 1);
+        let e: u32x4 = u32x4::new(1, 0, 3, 2);
+        let r: u32x4 = transmute(vbcaxq_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmas_lane_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 6.;
-        let c: f32x2 = f32x2::new(3., 0.);
-        let e: f32 = 20.;
-        let r: f32 = transmute(vfmas_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vbcaxq_u64() {
+        let a: u64x2 = u64x2::new(1, 0);
+        let b: u64x2 = u64x2::new(0, 1);
+        let c: u64x2 = u64x2::new(1, 1);
+        let e: u64x2 = u64x2::new(1, 0);
+        let r: u64x2 = transmute(vbcaxq_u64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmas_laneq_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 6.;
-        let c: f32x4 = f32x4::new(3., 0., 0., 0.);
-        let e: f32 = 20.;
-        let r: f32 = transmute(vfmas_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcadd_rot270_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcadd_rot270_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmad_lane_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64 = 3.;
-        let e: f64 = 20.;
-        let r: f64 = transmute(vfmad_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcaddq_rot270_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcaddq_rot270_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmad_laneq_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(3., 0.);
-        let e: f64 = 20.;
-        let r: f64 = transmute(vfmad_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcaddq_rot270_f64() {
+        let a: f64x2 = f64x2::new(1., -1.);
+        let b: f64x2 = f64x2::new(-1., 1.);
+        let e: f64x2 = f64x2::new(2., 0.);
+        let r: f64x2 = transmute(vcaddq_rot270_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_f64() {
-        let a: f64 = 20.0;
-        let b: f64 = 6.0;
-        let c: f64 = 2.0;
-        let e: f64 = 8.0;
-        let r: f64 = transmute(vfms_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcadd_rot90_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcadd_rot90_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_f64() {
-        let a: f64x2 = f64x2::new(20.0, 30.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64x2 = f64x2::new(2.0, 3.0);
-        let e: f64x2 = f64x2::new(8.0, 18.0);
-        let r: f64x2 = transmute(vfmsq_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcaddq_rot90_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
+        let r: f32x4 = transmute(vcaddq_rot90_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_n_f64() {
-        let a: f64 = 50.0;
-        let b: f64 = 6.0;
-        let c: f64 = 8.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vfms_n_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcaddq_rot90_f64() {
+        let a: f64x2 = f64x2::new(1., -1.);
+        let b: f64x2 = f64x2::new(-1., 1.);
+        let e: f64x2 = f64x2::new(0., -2.);
+        let r: f64x2 = transmute(vcaddq_rot90_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_n_f64() {
-        let a: f64x2 = f64x2::new(50.0, 35.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64 = 8.0;
-        let e: f64x2 = f64x2::new(2.0, 3.0);
-        let r: f64x2 = transmute(vfmsq_n_f64(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmla_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcmla_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_lane_f32() {
-        let a: f32x2 = f32x2::new(14., 11.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(2., 3.);
-        let r: f32x2 = transmute(vfms_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(0., -2., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_laneq_f32() {
-        let a: f32x2 = f32x2::new(14., 11.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(2., 3.);
-        let r: f32x2 = transmute(vfms_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_f64() {
+        let a: f64x2 = f64x2::new(1., -1.);
+        let b: f64x2 = f64x2::new(-1., 1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(0., -2.);
+        let r: f64x2 = transmute(vcmlaq_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_lane_f32() {
-        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let r: f32x4 = transmute(vfmsq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmla_rot90_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let b: f32x2 = f32x2::new(1., -1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcmla_rot90_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_laneq_f32() {
-        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let r: f32x4 = transmute(vfmsq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_rot90_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot90_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_lane_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vfms_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_rot90_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let b: f64x2 = f64x2::new(1., -1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(2., 0.);
+        let r: f64x2 = transmute(vcmlaq_rot90_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_laneq_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vfms_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmla_rot180_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let b: f32x2 = f32x2::new(1., -1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcmla_rot180_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_lane_f64() {
-        let a: f64x2 = f64x2::new(14., 11.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 3.);
-        let r: f64x2 = transmute(vfmsq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_rot180_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot180_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_laneq_f64() {
-        let a: f64x2 = f64x2::new(14., 11.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(2., 3.);
-        let r: f64x2 = transmute(vfmsq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_rot180_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let b: f64x2 = f64x2::new(1., -1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vcmlaq_rot180_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmss_lane_f32() {
-        let a: f32 = 14.;
-        let b: f32 = 6.;
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32 = 2.;
-        let r: f32 = transmute(vfmss_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmla_rot270_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let b: f32x2 = f32x2::new(1., -1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., 2.);
+        let r: f32x2 = transmute(vcmla_rot270_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmss_laneq_f32() {
-        let a: f32 = 14.;
-        let b: f32 = 6.;
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32 = 2.;
-        let r: f32 = transmute(vfmss_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_rot270_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let e: f32x4 = f32x4::new(0., 2., 0., 2.);
+        let r: f32x4 = transmute(vcmlaq_rot270_f32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsd_lane_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vfmsd_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmlaq_rot270_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let b: f64x2 = f64x2::new(1., -1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(0., 2.);
+        let r: f64x2 = transmute(vcmlaq_rot270_f64(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsd_laneq_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vfmsd_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+    unsafe fn test_vcmla_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcmla_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdiv_f32() {
-        let a: f32x2 = f32x2::new(2.0, 6.0);
-        let b: f32x2 = f32x2::new(1.0, 2.0);
-        let e: f32x2 = f32x2::new(2.0, 3.0);
-        let r: f32x2 = transmute(vdiv_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcmla_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcmla_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdivq_f32() {
-        let a: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0);
-        let b: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0);
-        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let r: f32x4 = transmute(vdivq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vcmlaq_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
+        let r: f32x4 = transmute(vcmlaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdiv_f64() {
-        let a: f64 = 2.0;
-        let b: f64 = 1.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vdiv_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcmlaq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
+        let r: f32x4 = transmute(vcmlaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdivq_f64() {
-        let a: f64x2 = f64x2::new(2.0, 6.0);
-        let b: f64x2 = f64x2::new(1.0, 2.0);
-        let e: f64x2 = f64x2::new(2.0, 3.0);
-        let r: f64x2 = transmute(vdivq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcmla_rot90_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcmla_rot90_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 1.0;
-        let e: f64 = 0.0;
-        let r: f64 = transmute(vsub_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcmla_rot90_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcmla_rot90_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 4.0);
-        let b: f64x2 = f64x2::new(1.0, 2.0);
-        let e: f64x2 = f64x2::new(0.0, 2.0);
-        let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vcmlaq_rot90_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot90_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i32 = 10;
-        let r: i32 = transmute(vaddlv_s16(transmute(a)));
+    unsafe fn test_vcmlaq_rot90_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot90_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32 = 36;
-        let r: i32 = transmute(vaddlvq_s16(transmute(a)));
+    unsafe fn test_vcmla_rot180_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcmla_rot180_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: i64 = 3;
-        let r: i64 = transmute(vaddlv_s32(transmute(a)));
+    unsafe fn test_vcmla_rot180_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcmla_rot180_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i64 = 10;
-        let r: i64 = transmute(vaddlvq_s32(transmute(a)));
+    unsafe fn test_vcmlaq_rot180_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot180_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u32 = 10;
-        let r: u32 = transmute(vaddlv_u16(transmute(a)));
+    unsafe fn test_vcmlaq_rot180_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot180_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32 = 36;
-        let r: u32 = transmute(vaddlvq_u16(transmute(a)));
+    unsafe fn test_vcmla_rot270_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(2., -2.);
+        let r: f32x2 = transmute(vcmla_rot270_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: u64 = 3;
-        let r: u64 = transmute(vaddlv_u32(transmute(a)));
+    unsafe fn test_vcmla_rot270_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(2., -2.);
+        let r: f32x2 = transmute(vcmla_rot270_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u64 = 10;
-        let r: u64 = transmute(vaddlvq_u32(transmute(a)));
+    unsafe fn test_vcmlaq_rot270_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(2., -2., 2., -2.);
+        let r: f32x4 = transmute(vcmlaq_rot270_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_s8() {
-        let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b)));
+    unsafe fn test_vcmlaq_rot270_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(2., -2., 2., -2.);
+        let r: f32x4 = transmute(vcmlaq_rot270_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_s16() {
-        let a: i32x4 = i32x4::new(8, 9, 10, 11);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vdot_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 176);
+        let r: i32x2 = transmute(vdot_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_s32() {
-        let a: i64x2 = i64x2::new(8, 9);
-        let b: i32x4 = i32x4::new(6, 7, 8, 9);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vdotq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 176, 31, 176);
+        let r: i32x4 = transmute(vdotq_s32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_u8() {
-        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b)));
+    unsafe fn test_vdot_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 176);
+        let r: u32x2 = transmute(vdot_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_u16() {
-        let a: u32x4 = u32x4::new(8, 9, 10, 11);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vdotq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 176, 31, 176);
+        let r: u32x4 = transmute(vdotq_u32(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_u32() {
-        let a: u64x2 = u64x2::new(8, 9);
-        let b: u32x4 = u32x4::new(6, 7, 8, 9);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vdot_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 72);
+        let r: i32x2 = transmute(vdot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b)));
+    unsafe fn test_vdot_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 72);
+        let r: i32x2 = transmute(vdot_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_s16() {
-        let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
-        let e: i32x4 = i32x4::new(4, 5, 6, 7);
-        let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vdotq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 72, 31, 72);
+        let r: i32x4 = transmute(vdotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_s32() {
-        let a: i32x4 = i32x4::new(12, 13, 14, 15);
-        let b: i32x4 = i32x4::new(6, 6, 8, 8);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vdotq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 72, 31, 72);
+        let r: i32x4 = transmute(vdotq_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b)));
+    unsafe fn test_vdot_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 72);
+        let r: u32x2 = transmute(vdot_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_u16() {
-        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
-        let e: u32x4 = u32x4::new(4, 5, 6, 7);
-        let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vdot_laneq_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 72);
+        let r: u32x2 = transmute(vdot_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_u32() {
-        let a: u32x4 = u32x4::new(12, 13, 14, 15);
-        let b: u32x4 = u32x4::new(6, 6, 8, 8);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vdotq_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 72, 31, 72);
+        let r: u32x4 = transmute(vdotq_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 72, 31, 72);
+        let r: u32x4 = transmute(vdotq_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
         assert_eq!(r, e);
     }
 
@@ -16468,6 +19954,30 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmv_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmaxnmv_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmvq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 0., 1.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmaxnmvq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmvq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmaxnmvq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vpmaxnm_f32() {
         let a: f32x2 = f32x2::new(1.0, 2.0);
@@ -16495,6 +20005,38 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnms_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vpmaxnms_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnmqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vpmaxnmqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxs_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vpmaxs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vpmaxqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vmin_f64() {
         let a: f64 = 1.0;
@@ -16531,6 +20073,112 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmv_f32() {
+        let a: f32x2 = f32x2::new(1., 0.);
+        let e: f32 = 0.;
+        let r: f32 = transmute(vminnmv_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmvq_f32() {
+        let a: f32x4 = f32x4::new(1., 0., 2., 3.);
+        let e: f32 = 0.;
+        let r: f32 = transmute(vminnmvq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmvq_f64() {
+        let a: f64x2 = f64x2::new(1., 0.);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vminnmvq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10);
+        let e: i16x8 = i16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
+        let r: i16x8 = transmute(vmovl_high_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 3, 4, 5, 6);
+        let e: i32x4 = i32x4::new(3, 4, 5, 6);
+        let r: i32x4 = transmute(vmovl_high_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i64x2 = i64x2::new(3, 4);
+        let r: i64x2 = transmute(vmovl_high_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10);
+        let e: u16x8 = u16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
+        let r: u16x8 = transmute(vmovl_high_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 3, 4, 5, 6);
+        let e: u32x4 = u32x4::new(3, 4, 5, 6);
+        let r: u32x4 = transmute(vmovl_high_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u64x2 = u64x2::new(3, 4);
+        let r: u64x2 = transmute(vmovl_high_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(3., 4., 5., 6.);
+        let e: f32x4 = f32x4::new(3., 7., 7., 11.);
+        let r: f32x4 = transmute(vpaddq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(3., 4.);
+        let e: f64x2 = f64x2::new(3., 7.);
+        let r: f64x2 = transmute(vpaddq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpadds_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 3.;
+        let r: f32 = transmute(vpadds_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 3.;
+        let r: f64 = transmute(vpaddd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vpminnm_f32() {
         let a: f32x2 = f32x2::new(1.0, 2.0);
@@ -16558,6 +20206,38 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnms_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vpminnms_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnmqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vpminnmqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmins_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vpmins_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vpminqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqdmullh_s16() {
         let a: i16 = 2;
@@ -16802,6 +20482,66 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlalh_s16() {
+        let a: i32 = 1;
+        let b: i16 = 1;
+        let c: i16 = 2;
+        let e: i32 = 5;
+        let r: i32 = transmute(vqdmlalh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlals_s32() {
+        let a: i64 = 1;
+        let b: i32 = 1;
+        let c: i32 = 2;
+        let e: i64 = 5;
+        let r: i64 = transmute(vqdmlals_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlalh_lane_s16() {
+        let a: i32 = 1;
+        let b: i16 = 1;
+        let c: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i32 = 5;
+        let r: i32 = transmute(vqdmlalh_lane_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlalh_laneq_s16() {
+        let a: i32 = 1;
+        let b: i16 = 1;
+        let c: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
+        let e: i32 = 5;
+        let r: i32 = transmute(vqdmlalh_laneq_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlals_lane_s32() {
+        let a: i64 = 1;
+        let b: i32 = 1;
+        let c: i32x2 = i32x2::new(2, 1);
+        let e: i64 = 5;
+        let r: i64 = transmute(vqdmlals_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlals_laneq_s32() {
+        let a: i64 = 1;
+        let b: i32 = 1;
+        let c: i32x4 = i32x4::new(2, 1, 1, 1);
+        let e: i64 = 5;
+        let r: i64 = transmute(vqdmlals_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqdmlsl_high_s16() {
         let a: i32x4 = i32x4::new(39, 58, 81, 108);
@@ -16902,6 +20642,66 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlslh_s16() {
+        let a: i32 = 10;
+        let b: i16 = 1;
+        let c: i16 = 2;
+        let e: i32 = 6;
+        let r: i32 = transmute(vqdmlslh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsls_s32() {
+        let a: i64 = 10;
+        let b: i32 = 1;
+        let c: i32 = 2;
+        let e: i64 = 6;
+        let r: i64 = transmute(vqdmlsls_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlslh_lane_s16() {
+        let a: i32 = 10;
+        let b: i16 = 1;
+        let c: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i32 = 6;
+        let r: i32 = transmute(vqdmlslh_lane_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlslh_laneq_s16() {
+        let a: i32 = 10;
+        let b: i16 = 1;
+        let c: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
+        let e: i32 = 6;
+        let r: i32 = transmute(vqdmlslh_laneq_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsls_lane_s32() {
+        let a: i64 = 10;
+        let b: i32 = 1;
+        let c: i32x2 = i32x2::new(2, 1);
+        let e: i64 = 6;
+        let r: i64 = transmute(vqdmlsls_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsls_laneq_s32() {
+        let a: i64 = 10;
+        let b: i32 = 1;
+        let c: i32x4 = i32x4::new(2, 1, 1, 1);
+        let e: i64 = 6;
+        let r: i64 = transmute(vqdmlsls_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqdmulhh_s16() {
         let a: i16 = 1;
@@ -16956,6 +20756,42 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulh_lane_s16() {
+        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vqdmulh_lane_s16::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhq_lane_s16() {
+        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vqdmulhq_lane_s16::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulh_lane_s32() {
+        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i32x2 = i32x2::new(2, 1);
+        let e: i32x2 = i32x2::new(1, 1);
+        let r: i32x2 = transmute(vqdmulh_lane_s32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhq_lane_s32() {
+        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i32x2 = i32x2::new(2, 1);
+        let e: i32x4 = i32x4::new(1, 1, 1, 1);
+        let r: i32x4 = transmute(vqdmulhq_lane_s32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqmovnh_s16() {
         let a: i16 = 1;
@@ -17797,6 +21633,42 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddb_u8() {
+        let a: u8 = 2;
+        let b: i8 = 2;
+        let e: u8 = 4;
+        let r: u8 = transmute(vsqaddb_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddh_u16() {
+        let a: u16 = 2;
+        let b: i16 = 2;
+        let e: u16 = 4;
+        let r: u16 = transmute(vsqaddh_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqadds_u32() {
+        let a: u32 = 2;
+        let b: i32 = 2;
+        let e: u32 = 4;
+        let r: u32 = transmute(vsqadds_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddd_u64() {
+        let a: u64 = 2;
+        let b: i64 = 2;
+        let e: u64 = 4;
+        let r: u64 = transmute(vsqaddd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vsqrt_f32() {
         let a: f32x2 = f32x2::new(4.0, 9.0);
@@ -17845,6 +21717,58 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtes_f32() {
+        let a: f32 = 1.0;
+        let e: f32 = 0.998046875;
+        let r: f32 = transmute(vrsqrtes_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrted_f64() {
+        let a: f64 = 1.0;
+        let e: f64 = 0.998046875;
+        let r: f64 = transmute(vrsqrted_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrts_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 1.0;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vrsqrts_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtsq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(1., -0.5);
+        let r: f64x2 = transmute(vrsqrtsq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtss_f32() {
+        let a: f32 = 1.0;
+        let b: f32 = 1.0;
+        let e: f32 = 1.;
+        let r: f32 = transmute(vrsqrtss_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtsd_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 1.0;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vrsqrtsd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vrecpe_f64() {
         let a: f64 = 4.0;
@@ -17861,6 +21785,74 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpes_f32() {
+        let a: f32 = 4.0;
+        let e: f32 = 0.24951171875;
+        let r: f32 = transmute(vrecpes_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecped_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 0.24951171875;
+        let r: f64 = transmute(vrecped_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecps_f64() {
+        let a: f64 = 4.0;
+        let b: f64 = 4.0;
+        let e: f64 = -14.;
+        let r: f64 = transmute(vrecps_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpsq_f64() {
+        let a: f64x2 = f64x2::new(4.0, 3.0);
+        let b: f64x2 = f64x2::new(4.0, 3.0);
+        let e: f64x2 = f64x2::new(-14., -7.);
+        let r: f64x2 = transmute(vrecpsq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpss_f32() {
+        let a: f32 = 4.0;
+        let b: f32 = 4.0;
+        let e: f32 = -14.;
+        let r: f32 = transmute(vrecpss_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpsd_f64() {
+        let a: f64 = 4.0;
+        let b: f64 = 4.0;
+        let e: f64 = -14.;
+        let r: f64 = transmute(vrecpsd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpxs_f32() {
+        let a: f32 = 4.0;
+        let e: f32 = 0.5;
+        let r: f32 = transmute(vrecpxs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpxd_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 0.5;
+        let r: f64 = transmute(vrecpxd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s64_p64() {
         let a: i64x1 = i64x1::new(0);
@@ -18703,6 +22695,66 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_s16() {
+        let a: i8x8 = i8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x16 = i8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vrsubhn_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_s32() {
+        let a: i16x4 = i16x4::new(1, 2, 0, 0);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let c: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i16x8 = i16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vrsubhn_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_s64() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i64x2 = i64x2::new(1, 2);
+        let c: i64x2 = i64x2::new(1, 2);
+        let e: i32x4 = i32x4::new(1, 2, 0, 0);
+        let r: i32x4 = transmute(vrsubhn_high_s64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_u16() {
+        let a: u8x8 = u8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x16 = u8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vrsubhn_high_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_u32() {
+        let a: u16x4 = u16x4::new(1, 2, 0, 0);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u16x8 = u16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vrsubhn_high_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_u64() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u64x2 = u64x2::new(1, 2);
+        let c: u64x2 = u64x2::new(1, 2);
+        let e: u32x4 = u32x4::new(1, 2, 0, 0);
+        let r: u32x4 = transmute(vrsubhn_high_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vset_lane_f64() {
         let a: f64 = 1.;
@@ -20104,4 +24156,36 @@ mod test {
         let r: i64x2 = transmute(vqabsq_s64(transmute(a)));
         assert_eq!(r, e);
     }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsb_s8() {
+        let a: i8 = -7;
+        let e: i8 = 7;
+        let r: i8 = transmute(vqabsb_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsh_s16() {
+        let a: i16 = -7;
+        let e: i16 = 7;
+        let r: i16 = transmute(vqabsh_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabss_s32() {
+        let a: i32 = -7;
+        let e: i32 = 7;
+        let r: i32 = transmute(vqabss_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsd_s64() {
+        let a: i64 = -7;
+        let e: i64 = 7;
+        let r: i64 = transmute(vqabsd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
 }
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index 2f8cd1283f..82d76bd203 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -2985,7 +2985,24 @@ pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x
     static_assert_imm4!(N);
     transmute(vsliq_n_s16_(transmute(a), transmute(b), N))
 }
-
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    transmute(vsliq_n_s64_(transmute(a), transmute(b), N))
+}
 /// Shift Right and Insert (immediate)
 #[inline]
 #[target_feature(enable = "neon")]
@@ -3166,6 +3183,24 @@ pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x
     static_assert!(N: i32 where N >= 1 && N <= 16);
     transmute(vsriq_n_s16_(transmute(a), transmute(b), N))
 }
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    transmute(vsriq_n_s64_(transmute(a), transmute(b), N))
+}
 
 #[cfg(test)]
 mod tests {
diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs
index 2903e46330..a0ad92c33a 100644
--- a/crates/core_arch/src/arm/neon.rs
+++ b/crates/core_arch/src/arm/neon.rs
@@ -1047,7 +1047,32 @@ pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x
         int16x8_t(n, n, n, n, n, n, n, n),
     ))
 }
-
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,v7,aes")]
+#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N : i32 where 0 <= N && N <= 63);
+    transmute(vshiftins_v1i64(
+        transmute(a),
+        transmute(b),
+        int64x1_t(N as i64),
+    ))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,v7,aes")]
+#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N : i32 where 0 <= N && N <= 63);
+    transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t(N as i64, N as i64),
+    ))
+}
 /// Shift Right and Insert (immediate)
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -1292,6 +1317,32 @@ pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x
         int16x8_t(n, n, n, n, n, n, n, n),
     ))
 }
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,v7,aes")]
+#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N : i32 where 1 <= N && N <= 64);
+    transmute(vshiftins_v1i64(
+        transmute(a),
+        transmute(b),
+        int64x1_t(-N as i64),
+    ))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,v7,aes")]
+#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N : i32 where 1 <= N && N <= 64);
+    transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t(-N as i64, -N as i64),
+    ))
+}
 
 #[cfg(test)]
 mod tests {
diff --git a/crates/core_arch/src/arm_shared/crypto.rs b/crates/core_arch/src/arm_shared/crypto.rs
index 56b99088bf..3e9515e59e 100644
--- a/crates/core_arch/src/arm_shared/crypto.rs
+++ b/crates/core_arch/src/arm_shared/crypto.rs
@@ -51,8 +51,6 @@ extern "unadjusted" {
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-// TODO: Use AES for ARM when the minimum LLVM version includes b8baa2a9132498ea286dbb0d03f005760ecc6fdb
-
 /// AES single round encryption.
 #[inline]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "aes"))]
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index 95972bd33c..27d76cbc86 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -5378,6 +5378,84 @@ pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 vhaddq_s32_(a, b)
 }
 
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+pub unsafe fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v8i8")]
+        fn vraddhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+vraddhn_s16_(a, b)
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+pub unsafe fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v4i16")]
+        fn vraddhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+    }
+vraddhn_s32_(a, b)
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v2i32")]
+        fn vraddhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+vraddhn_s64_(a, b)
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    transmute(vraddhn_s16(transmute(a), transmute(b)))
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    transmute(vraddhn_s32(transmute(a), transmute(b)))
+}
+
+/// Rounding add returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))]
+pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    transmute(vraddhn_s64(transmute(a), transmute(b)))
+}
+
 /// Rounding halving add
 #[inline]
 #[target_feature(enable = "neon")]
@@ -14608,6 +14686,76 @@ pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     simd_sub(a, b)
 }
 
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_xor(a, b)
+}
+
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_xor(a, b)
+}
+
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_xor(a, b)
+}
+
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_xor(a, b)
+}
+
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    simd_xor(a, b)
+}
+
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_xor(a, b)
+}
+
+/// Bitwise exclusive OR
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 {
+    a ^ b
+}
+
 /// Subtract returning high narrow
 #[inline]
 #[target_feature(enable = "neon")]
@@ -15576,6 +15724,22 @@ pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 vminnmq_f32_(a, b)
 }
 
+/// Floating-point add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(faddp))]
+pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v2f32")]
+        fn vpadd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vpadd_f32_(a, b)
+}
+
 /// Signed saturating doubling multiply long
 #[inline]
 #[target_feature(enable = "neon")]
@@ -15890,6 +16054,54 @@ pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     vqdmulhq_s32(a, b)
 }
 
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract(b, LANE as u32)))
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract(b, LANE as u32)))
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract(b, LANE as u32)))
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract(b, LANE as u32)))
+}
+
 /// Signed saturating extract narrow
 #[inline]
 #[target_feature(enable = "neon")]
@@ -17826,6 +18038,70 @@ pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
 vrsqrteq_f32_(a)
 }
 
+/// Unsigned reciprocal square root estimate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursqrte))]
+pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ursqrte.v2i32")]
+        fn vrsqrte_u32_(a: uint32x2_t) -> uint32x2_t;
+    }
+vrsqrte_u32_(a)
+}
+
+/// Unsigned reciprocal square root estimate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursqrte))]
+pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ursqrte.v4i32")]
+        fn vrsqrteq_u32_(a: uint32x4_t) -> uint32x4_t;
+    }
+vrsqrteq_u32_(a)
+}
+
+/// Floating-point reciprocal square root step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrts))]
+pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v2f32")]
+        fn vrsqrts_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vrsqrts_f32_(a, b)
+}
+
+/// Floating-point reciprocal square root step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrts))]
+pub unsafe fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v4f32")]
+        fn vrsqrtsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+vrsqrtsq_f32_(a, b)
+}
+
 /// Reciprocal estimate.
 #[inline]
 #[target_feature(enable = "neon")]
@@ -17858,6 +18134,70 @@ pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
 vrecpeq_f32_(a)
 }
 
+/// Unsigned reciprocal estimate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urecpe))]
+pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urecpe.v2i32")]
+        fn vrecpe_u32_(a: uint32x2_t) -> uint32x2_t;
+    }
+vrecpe_u32_(a)
+}
+
+/// Unsigned reciprocal estimate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urecpe))]
+pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urecpe.v4i32")]
+        fn vrecpeq_u32_(a: uint32x4_t) -> uint32x4_t;
+    }
+vrecpeq_u32_(a)
+}
+
+/// Floating-point reciprocal step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecps))]
+pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v2f32")]
+        fn vrecps_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+vrecps_f32_(a, b)
+}
+
+/// Floating-point reciprocal step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecps))]
+pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v4f32")]
+        fn vrecpsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+vrecpsq_f32_(a, b)
+}
+
 /// Vector reinterpret cast operation
 #[inline]
 #[target_feature(enable = "neon")]
@@ -20830,6 +21170,84 @@ pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64
     simd_add(a, vrshrq_n_u64::<N>(b))
 }
 
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v8i8")]
+        fn vrsubhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+vrsubhn_s16_(a, b)
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v4i16")]
+        fn vrsubhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+    }
+vrsubhn_s32_(a, b)
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v2i32")]
+        fn vrsubhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+vrsubhn_s64_(a, b)
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    transmute(vrsubhn_s16(transmute(a), transmute(b)))
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    transmute(vrsubhn_s32(transmute(a), transmute(b)))
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))]
+pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    transmute(vrsubhn_s64(transmute(a), transmute(b)))
+}
+
 /// Insert vector element from another vector element
 #[inline]
 #[target_feature(enable = "neon")]
@@ -22094,40 +22512,688 @@ pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x
     simd_add(a, vshrq_n_u64::<N>(b))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Transpose elements
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
-pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    let d: uint8x8_t = vabd_u8(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    let a1: int8x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: int8x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Transpose elements
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
-pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    let d: uint16x4_t = vabd_u16(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    let a1: int16x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]);
+    let b1: int16x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
+/// Transpose elements
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
-pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    let d: uint32x2_t = vabd_u32(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a1: int8x16_t = simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]);
+    let b1: int8x16_t = simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]);
+    transmute((a1, b1))
 }
 
-/// Signed Absolute difference and Accumulate Long
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a1: int16x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: int16x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a1: int32x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]);
+    let b1: int32x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a1: uint8x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: uint8x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a1: uint16x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]);
+    let b1: uint16x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a1: uint8x16_t = simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]);
+    let b1: uint8x16_t = simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a1: uint16x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: uint16x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a1: uint32x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]);
+    let b1: uint32x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a1: poly8x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: poly8x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a1: poly16x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]);
+    let b1: poly16x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a1: poly8x16_t = simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]);
+    let b1: poly8x16_t = simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a1: poly16x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: poly16x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a1: int32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b1: int32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a1: uint32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b1: uint32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    let a1: float32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b1: float32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a1, b1))
+}
+
+/// Transpose elements
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))]
+pub unsafe fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    let a1: float32x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]);
+    let b1: float32x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    let a0: int8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: int8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    let a0: int16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: int16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a0: int8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
+    let b0: int8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a0: int16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: int16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a0: int32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: int32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: uint8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: uint16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
+    let b0: uint8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: uint16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: uint32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: poly8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: poly16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
+    let b0: poly8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: poly16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a0: int32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: int32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a0: uint32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: uint32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    let a0: float32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: float32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    let a0: float32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: float32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    let a0: int8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: int8x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    let a0: int16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
+    let b0: int16x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a0: int8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
+    let b0: int8x16_t = simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a0: int16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: int16x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a0: int32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
+    let b0: int32x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: uint8x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
+    let b0: uint16x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
+    let b0: uint8x16_t = simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: uint16x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
+    let b0: uint32x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: poly8x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
+    let b0: poly16x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
+    let b0: poly8x16_t = simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: poly16x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a0: int32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: int32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a0: uint32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: uint32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    let a0: float32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: float32x2_t = simd_shuffle2!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
+pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    let a0: float32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
+    let b0: float32x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    let d: uint8x8_t = vabd_u8(b, c);
+    simd_add(a, simd_cast(d))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    let d: uint16x4_t = vabd_u16(b, c);
+    simd_add(a, simd_cast(d))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
+pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    let d: uint32x2_t = vabd_u32(b, c);
+    simd_add(a, simd_cast(d))
+}
+
+/// Signed Absolute difference and Accumulate Long
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -23997,7 +25063,7 @@ mod test {
     unsafe fn test_vcagt_f32() {
         let a: f32x2 = f32x2::new(-1.2, 0.0);
         let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let e: u32x2 = u32x2::new(!0, 0);
         let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
@@ -24006,7 +25072,7 @@ mod test {
     unsafe fn test_vcagtq_f32() {
         let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
         let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
+        let e: u32x4 = u32x4::new(!0, 0, 0xFF_FF_FF_FF, 0);
         let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
@@ -24015,7 +25081,7 @@ mod test {
     unsafe fn test_vcage_f32() {
         let a: f32x2 = f32x2::new(-1.2, 0.0);
         let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let e: u32x2 = u32x2::new(!0, 0xFF_FF_FF_FF);
         let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
@@ -24024,7 +25090,7 @@ mod test {
     unsafe fn test_vcageq_f32() {
         let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
         let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
+        let e: u32x4 = u32x4::new(!0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
         let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
@@ -26409,6 +27475,60 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vraddhn_s16() {
+        let a: i16x8 = i16x8::new(0x7F_FF, -32768, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x8 = i8x8::new(-128, -128, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vraddhn_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vraddhn_s32() {
+        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 3, 4);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(-32768, -32768, 0, 0);
+        let r: i16x4 = transmute(vraddhn_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vraddhn_s64() {
+        let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: i32x2 = i32x2::new(-2147483648, -2147483648);
+        let r: i32x2 = transmute(vraddhn_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vraddhn_u16() {
+        let a: u16x8 = u16x8::new(0xFF_FF, 0, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vraddhn_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vraddhn_u32() {
+        let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(0, 0, 0, 0);
+        let r: u16x4 = transmute(vraddhn_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vraddhn_u64() {
+        let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vraddhn_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vrhadd_u8() {
         let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@@ -31329,6 +32449,69 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_p8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i8x8 = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
+        let r: i8x8 = transmute(vadd_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_p16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(1, 1, 1, 1);
+        let e: i16x4 = i16x4::new(0, 3, 2, 5);
+        let r: i16x4 = transmute(vadd_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i8x16 = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17);
+        let r: i8x16 = transmute(vaddq_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_p16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i16x8 = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
+        let r: i16x8 = transmute(vaddq_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_p64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(1);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vadd_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_p64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(1, 1);
+        let e: i64x2 = i64x2::new(0, 3);
+        let r: i64x2 = transmute(vaddq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_p128() {
+        let a: p128 = 16;
+        let b: p128 = 1;
+        let e: p128 = 17;
+        let r: p128 = transmute(vaddq_p128(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vsubhn_s16() {
         let a: i16x8 = i16x8::new(0x7F_FF, -32768, 1, 1, 0x7F_FF, -32768, 1, 1);
@@ -31947,6 +33130,15 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpadd_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(3., 4.);
+        let e: f32x2 = f32x2::new(3., 7.);
+        let r: f32x2 = transmute(vpadd_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqdmull_s16() {
         let a: i16x4 = i16x4::new(0, 1, 2, 3);
@@ -32193,6 +33385,42 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhq_laneq_s16() {
+        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vqdmulhq_laneq_s16::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulh_laneq_s16() {
+        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vqdmulh_laneq_s16::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhq_laneq_s32() {
+        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i32x4 = i32x4::new(2, 1, 1, 1);
+        let e: i32x4 = i32x4::new(1, 1, 1, 1);
+        let r: i32x4 = transmute(vqdmulhq_laneq_s32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulh_laneq_s32() {
+        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i32x4 = i32x4::new(2, 1, 1, 1);
+        let e: i32x2 = i32x2::new(1, 1);
+        let r: i32x2 = transmute(vqdmulh_laneq_s32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqmovn_s16() {
         let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
@@ -33225,6 +34453,40 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrte_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let e: u32x2 = u32x2::new(4294967295, 4294967295);
+        let r: u32x2 = transmute(vrsqrte_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrteq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(4294967295, 4294967295, 4294967295, 4294967295);
+        let r: u32x4 = transmute(vrsqrteq_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrts_f32() {
+        let a: f32x2 = f32x2::new(1.0, 2.0);
+        let b: f32x2 = f32x2::new(1.0, 2.0);
+        let e: f32x2 = f32x2::new(1., -0.5);
+        let r: f32x2 = transmute(vrsqrts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtsq_f32() {
+        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
+        let b: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
+        let e: f32x4 = f32x4::new(1., -0.5, -3.0, -6.5);
+        let r: f32x4 = transmute(vrsqrtsq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vrecpe_f32() {
         let a: f32x2 = f32x2::new(4.0, 3.0);
@@ -33241,6 +34503,40 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpe_u32() {
+        let a: u32x2 = u32x2::new(4, 3);
+        let e: u32x2 = u32x2::new(4294967295, 4294967295);
+        let r: u32x2 = transmute(vrecpe_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpeq_u32() {
+        let a: u32x4 = u32x4::new(4, 3, 2, 1);
+        let e: u32x4 = u32x4::new(4294967295, 4294967295, 4294967295, 4294967295);
+        let r: u32x4 = transmute(vrecpeq_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecps_f32() {
+        let a: f32x2 = f32x2::new(4.0, 3.0);
+        let b: f32x2 = f32x2::new(4.0, 3.0);
+        let e: f32x2 = f32x2::new(-14., -7.);
+        let r: f32x2 = transmute(vrecps_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpsq_f32() {
+        let a: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0);
+        let b: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0);
+        let e: f32x4 = f32x4::new(-14., -7., -2., 1.);
+        let r: f32x4 = transmute(vrecpsq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vreinterpret_s8_u8() {
         let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
@@ -35430,38 +36726,92 @@ mod test {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let b: u32x2 = u32x2::new(4, 8);
-        let e: u32x2 = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vrsra_n_u32::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vrsra_n_u32() {
+        let a: u32x2 = u32x2::new(1, 1);
+        let b: u32x2 = u32x2::new(4, 8);
+        let e: u32x2 = u32x2::new(2, 3);
+        let r: u32x2 = transmute(vrsra_n_u32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsraq_n_u32() {
+        let a: u32x4 = u32x4::new(1, 1, 1, 1);
+        let b: u32x4 = u32x4::new(4, 8, 12, 16);
+        let e: u32x4 = u32x4::new(2, 3, 4, 5);
+        let r: u32x4 = transmute(vrsraq_n_u32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsra_n_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x1 = u64x1::new(4);
+        let e: u64x1 = u64x1::new(2);
+        let r: u64x1 = transmute(vrsra_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsraq_n_u64() {
+        let a: u64x2 = u64x2::new(1, 1);
+        let b: u64x2 = u64x2::new(4, 8);
+        let e: u64x2 = u64x2::new(2, 3);
+        let r: u64x2 = transmute(vrsraq_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_s16() {
+        let a: i16x8 = i16x8::new(0x7F_FF, -32768, 0, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x8 = i8x8::new(-128, -128, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vrsubhn_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_s32() {
+        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 0, 4);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i16x4 = i16x4::new(-32768, -32768, 0, 0);
+        let r: i16x4 = transmute(vrsubhn_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_s64() {
+        let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: i32x2 = i32x2::new(-2147483648, -2147483648);
+        let r: i32x2 = transmute(vrsubhn_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 1);
-        let b: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u32x4 = u32x4::new(2, 3, 4, 5);
-        let r: u32x4 = transmute(vrsraq_n_u32::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vrsubhn_u16() {
+        let a: u16x8 = u16x8::new(0xFF_FF, 0, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vrsubhn_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(4);
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vrsra_n_u64::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vrsubhn_u32() {
+        let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u16x4 = u16x4::new(0, 0, 0, 0);
+        let r: u16x4 = transmute(vrsubhn_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let b: u64x2 = u64x2::new(4, 8);
-        let e: u64x2 = u64x2::new(2, 3);
-        let r: u64x2 = transmute(vrsraq_n_u64::<2>(transmute(a), transmute(b)));
+    unsafe fn test_vrsubhn_u64() {
+        let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vrsubhn_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -36321,6 +37671,492 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 2, 6, 2, 10, 6, 14);
+        let b: i8x8 = i8x8::new(1, 3, 3, 7, 3, 1, 7, 15);
+        let e: [i8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
+        let r: [i8; 16] = transmute(vtrn_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_s16() {
+        let a: i16x4 = i16x4::new(0, 2, 2, 6);
+        let b: i16x4 = i16x4::new(1, 3, 3, 7);
+        let e: [i16; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
+        let r: [i16; 8] = transmute(vtrn_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30);
+        let b: i8x16 = i8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31);
+        let e: [i8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31];
+        let r: [i8; 32] = transmute(vtrnq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_s16() {
+        let a: i16x8 = i16x8::new(0, 2, 2, 6, 2, 10, 6, 14);
+        let b: i16x8 = i16x8::new(1, 3, 3, 7, 3, 1, 7, 15);
+        let e: [i16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
+        let r: [i16; 16] = transmute(vtrnq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_s32() {
+        let a: i32x4 = i32x4::new(0, 2, 2, 6);
+        let b: i32x4 = i32x4::new(1, 3, 3, 7);
+        let e: [i32; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
+        let r: [i32; 8] = transmute(vtrnq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 2, 6, 2, 10, 6, 14);
+        let b: u8x8 = u8x8::new(1, 3, 3, 7, 3, 1, 7, 15);
+        let e: [u8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
+        let r: [u8; 16] = transmute(vtrn_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_u16() {
+        let a: u16x4 = u16x4::new(0, 2, 2, 6);
+        let b: u16x4 = u16x4::new(1, 3, 3, 7);
+        let e: [u16; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
+        let r: [u16; 8] = transmute(vtrn_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30);
+        let b: u8x16 = u8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31);
+        let e: [u8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31];
+        let r: [u8; 32] = transmute(vtrnq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_u16() {
+        let a: u16x8 = u16x8::new(0, 2, 2, 6, 2, 10, 6, 14);
+        let b: u16x8 = u16x8::new(1, 3, 3, 7, 3, 1, 7, 15);
+        let e: [u16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
+        let r: [u16; 16] = transmute(vtrnq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_u32() {
+        let a: u32x4 = u32x4::new(0, 2, 2, 6);
+        let b: u32x4 = u32x4::new(1, 3, 3, 7);
+        let e: [u32; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
+        let r: [u32; 8] = transmute(vtrnq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 2, 6, 2, 10, 6, 14);
+        let b: i8x8 = i8x8::new(1, 3, 3, 7, 3, 1, 7, 15);
+        let e: [u8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
+        let r: [u8; 16] = transmute(vtrn_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_p16() {
+        let a: i16x4 = i16x4::new(0, 2, 2, 6);
+        let b: i16x4 = i16x4::new(1, 3, 3, 7);
+        let e: [u16; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
+        let r: [u16; 8] = transmute(vtrn_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30);
+        let b: i8x16 = i8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31);
+        let e: [u8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31];
+        let r: [u8; 32] = transmute(vtrnq_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_p16() {
+        let a: i16x8 = i16x8::new(0, 2, 2, 6, 2, 10, 6, 14);
+        let b: i16x8 = i16x8::new(1, 3, 3, 7, 3, 1, 7, 15);
+        let e: [u16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
+        let r: [u16; 16] = transmute(vtrnq_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_s32() {
+        let a: i32x2 = i32x2::new(0, 2);
+        let b: i32x2 = i32x2::new(1, 3);
+        let e: [i32; 4] = [0, 1, 2, 3];
+        let r: [i32; 4] = transmute(vtrn_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_u32() {
+        let a: u32x2 = u32x2::new(0, 2);
+        let b: u32x2 = u32x2::new(1, 3);
+        let e: [u32; 4] = [0, 1, 2, 3];
+        let r: [u32; 4] = transmute(vtrn_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn_f32() {
+        let a: f32x2 = f32x2::new(0., 2.);
+        let b: f32x2 = f32x2::new(1., 3.);
+        let e: [f32; 4] = [0., 1., 2., 3.];
+        let r: [f32; 4] = transmute(vtrn_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrnq_f32() {
+        let a: f32x4 = f32x4::new(0., 2., 2., 6.);
+        let b: f32x4 = f32x4::new(1., 3., 3., 7.);
+        let e: [f32; 8] = [0., 1., 2., 3., 2., 3., 6., 7.];
+        let r: [f32; 8] = transmute(vtrnq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [i8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [i8; 16] = transmute(vzip_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_s16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: [i16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [i16; 8] = transmute(vzip_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: [i8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31];
+        let r: [i8; 32] = transmute(vzipq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_s16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [i16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [i16; 16] = transmute(vzipq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_s32() {
+        let a: i32x4 = i32x4::new(0, 2, 4, 6);
+        let b: i32x4 = i32x4::new(1, 3, 5, 7);
+        let e: [i32; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [i32; 8] = transmute(vzipq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [u8; 16] = transmute(vzip_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_u16() {
+        let a: u16x4 = u16x4::new(0, 2, 4, 6);
+        let b: u16x4 = u16x4::new(1, 3, 5, 7);
+        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [u16; 8] = transmute(vzip_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31];
+        let r: [u8; 32] = transmute(vzipq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_u16() {
+        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [u16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [u16; 16] = transmute(vzipq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_u32() {
+        let a: u32x4 = u32x4::new(0, 2, 4, 6);
+        let b: u32x4 = u32x4::new(1, 3, 5, 7);
+        let e: [u32; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [u32; 8] = transmute(vzipq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [u8; 16] = transmute(vzip_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_p16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [u16; 8] = transmute(vzip_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31];
+        let r: [u8; 32] = transmute(vzipq_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_p16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [u16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [u16; 16] = transmute(vzipq_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_s32() {
+        let a: i32x2 = i32x2::new(0, 2);
+        let b: i32x2 = i32x2::new(1, 3);
+        let e: [i32; 4] = [0, 1, 2, 3];
+        let r: [i32; 4] = transmute(vzip_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_u32() {
+        let a: u32x2 = u32x2::new(0, 2);
+        let b: u32x2 = u32x2::new(1, 3);
+        let e: [u32; 4] = [0, 1, 2, 3];
+        let r: [u32; 4] = transmute(vzip_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(5., 6.);
+        let e: [f32; 4] = [1., 5., 2., 6.];
+        let r: [f32; 4] = transmute(vzip_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzipq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(5., 6., 7., 8.);
+        let e: [f32; 8] = [1., 5., 2., 6., 3., 7., 4., 8.];
+        let r: [f32; 8] = transmute(vzipq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 3, 8);
+        let b: i8x8 = i8x8::new(2, 3, 3, 8, 3, 15, 8, 16);
+        let e: [i8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
+        let r: [i8; 16] = transmute(vuzp_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 2, 3);
+        let b: i16x4 = i16x4::new(2, 3, 3, 8);
+        let e: [i16; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
+        let r: [i16; 8] = transmute(vuzp_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16);
+        let b: i8x16 = i8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32);
+        let e: [i8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32];
+        let r: [i8; 32] = transmute(vuzpq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 3, 8);
+        let b: i16x8 = i16x8::new(2, 3, 3, 8, 3, 15, 8, 16);
+        let e: [i16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
+        let r: [i16; 16] = transmute(vuzpq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 2, 3);
+        let b: i32x4 = i32x4::new(2, 3, 3, 8);
+        let e: [i32; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
+        let r: [i32; 8] = transmute(vuzpq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 2, 3, 2, 3, 3, 8);
+        let b: u8x8 = u8x8::new(2, 3, 3, 8, 3, 15, 8, 16);
+        let e: [u8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
+        let r: [u8; 16] = transmute(vuzp_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 2, 3);
+        let b: u16x4 = u16x4::new(2, 3, 3, 8);
+        let e: [u16; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
+        let r: [u16; 8] = transmute(vuzp_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16);
+        let b: u8x16 = u8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32);
+        let e: [u8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32];
+        let r: [u8; 32] = transmute(vuzpq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 2, 3, 2, 3, 3, 8);
+        let b: u16x8 = u16x8::new(2, 3, 3, 8, 3, 15, 8, 16);
+        let e: [u16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
+        let r: [u16; 16] = transmute(vuzpq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 2, 3);
+        let b: u32x4 = u32x4::new(2, 3, 3, 8);
+        let e: [u32; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
+        let r: [u32; 8] = transmute(vuzpq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_p8() {
+        let a: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 3, 8);
+        let b: i8x8 = i8x8::new(2, 3, 3, 8, 3, 15, 8, 16);
+        let e: [u8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
+        let r: [u8; 16] = transmute(vuzp_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_p16() {
+        let a: i16x4 = i16x4::new(1, 2, 2, 3);
+        let b: i16x4 = i16x4::new(2, 3, 3, 8);
+        let e: [u16; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
+        let r: [u16; 8] = transmute(vuzp_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16);
+        let b: i8x16 = i8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32);
+        let e: [u8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32];
+        let r: [u8; 32] = transmute(vuzpq_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_p16() {
+        let a: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 3, 8);
+        let b: i16x8 = i16x8::new(2, 3, 3, 8, 3, 15, 8, 16);
+        let e: [u16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
+        let r: [u16; 16] = transmute(vuzpq_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x2 = i32x2::new(2, 3);
+        let e: [i32; 4] = [1, 2, 2, 3];
+        let r: [i32; 4] = transmute(vuzp_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(2, 3);
+        let e: [u32; 4] = [1, 2, 2, 3];
+        let r: [u32; 4] = transmute(vuzp_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(2., 6.);
+        let e: [f32; 4] = [1., 2., 2., 6.];
+        let r: [f32; 4] = transmute(vuzp_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzpq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 2., 4.);
+        let b: f32x4 = f32x4::new(2., 6., 6., 8.);
+        let e: [f32; 8] = [1., 2., 2., 6., 2., 4., 6., 8.];
+        let r: [f32; 8] = transmute(vuzpq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vabal_u8() {
         let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index d3f88abd9b..a423c2e480 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -478,9 +478,6 @@ extern "unadjusted" {
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
     #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v8i8")]
     fn vpadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v16i8")]
-    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v16i8")]
-    fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
 
     #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
     #[cfg_attr(
@@ -3922,6 +3919,26 @@ unsafe fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
     float32x2_t(value, value)
 }
 
+/// Load SIMD&FP register (immediate offset)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
+    *a
+}
+
+/// Store SIMD&FP register (immediate offset)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))]
+pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
+    *a = b;
+}
+
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
@@ -5817,6 +5834,23 @@ mod tests {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vldrq_p128() {
+        let v: [p128; 2] = [1, 2];
+        let e: p128 = 2;
+        let r: p128 = vldrq_p128(v[1..].as_ptr());
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vstrq_p128() {
+        let v: [p128; 2] = [1, 2];
+        let e: p128 = 2;
+        let mut r: p128 = 1;
+        vstrq_p128(&mut r, v[1]);
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vmov_n_s8() {
         let v: i8 = 64;
diff --git a/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs b/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
index 125659c105..ebb8b7b9ef 100644
--- a/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
+++ b/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
@@ -50,6 +50,8 @@ test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127
 test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
 test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
 test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
+test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45));
+test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33));
 
 macro_rules! test_vsri {
     ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
@@ -87,3 +89,5 @@ test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127
 test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
 test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
 test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
+test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45));
+test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33));
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
index 20f6c3d0fd..06d92af456 100644
--- a/crates/stdarch-gen/neon.spec
+++ b/crates/stdarch-gen/neon.spec
@@ -102,6 +102,19 @@ b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
 generate int*_t, uint*_t, int64x*_t, uint64x*_t
 
+/// Three-way exclusive OR
+name = veor3
+multi_fn = simd_xor, {simd_xor, a, b}, c
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+c = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+
+// llvm does not currently support `eor3` instructions
+aarch64 = nop
+generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
+generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
+
 ////////////////////
 // Absolute difference between the arguments
 ////////////////////
@@ -140,6 +153,16 @@ link-arm = vabds._EXT_
 link-aarch64 = fabd._EXT_
 generate float*_t
 
+/// Floating-point absolute difference
+name = vabd
+multi_fn = simd_extract, {vabd-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 1.0
+b = 9.0
+validate 8.0
+
+aarch64 = fabd
+generate f32, f64
+
 ////////////////////
 // Absolute difference Long
 ////////////////////
@@ -303,9 +326,28 @@ aarch64 = fcmeq
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
 arm = vceq.
-// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
+/// Compare bitwise equal
+name = vceq
+multi_fn = transmute, {vceq-in_ntt-noext, {transmute, a}, {transmute, b}}
+a = 1
+b = 2
+validate 0
+
+aarch64 = cmeq
+generate i64:u64, u64
+
+/// Floating-point compare equal
+name = vceq
+multi_fn = simd_extract, {vceq-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 1.
+b = 2.
+validate 0
+
+aarch64 = fcmeq
+generate f32:u32, f64:u64
+
 /// Signed compare bitwise equal to zero
 name = vceqz
 fn = simd_eq
@@ -336,6 +378,24 @@ validate TRUE, FALSE, FALSE, FALSE
 aarch64 = fcmeq
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
+/// Compare bitwise equal to zero
+name = vceqz
+multi_fn = transmute, {vceqz-in_ntt-noext, {transmute, a}}
+a = 1
+validate 0
+
+aarch64 = cmeq
+generate i64:u64, u64
+
+/// Floating-point compare bitwise equal to zero
+name = vceqz
+multi_fn = simd_extract, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = 1.
+validate 0
+
+aarch64 = fcmeq
+generate f32:u32, f64:u64
+
 /// Signed compare bitwise Test bits nonzero
 name = vtst
 multi_fn = simd_and, c:in_t, a, b
@@ -368,6 +428,40 @@ generate uint64x*_t
 arm = vtst
 generate uint*_t
 
+/// Compare bitwise test bits nonzero
+name = vtst
+multi_fn = transmute, {vtst-in_ntt-noext, {transmute, a}, {transmute, b}}
+a = 0
+b = 0
+validate 0
+
+aarch64 = cmtst
+generate i64:i64:u64, u64
+
+/// Signed saturating accumulate of unsigned value
+name = vuqadd
+out-suffix
+a = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
+b = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
+validate 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8
+
+aarch64 = suqadd
+link-aarch64 = suqadd._EXT_
+generate int8x8_t:uint8x8_t:int8x8_t, int16x4_t:uint16x4_t:int16x4_t, int32x2_t:uint32x2_t:int32x2_t, int64x1_t:uint64x1_t:int64x1_t
+generate int8x16_t:uint8x16_t:int8x16_t, int16x8_t:uint16x8_t:int16x8_t, int32x4_t:uint32x4_t:int32x4_t, int64x2_t:uint64x2_t:int64x2_t
+generate i32:u32:i32, i64:u64:i64
+
+/// Signed saturating accumulate of unsigned value
+name = vuqadd
+out-suffix
+multi_fn = simd_extract, {vuqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 1
+b = 2
+validate 3
+
+aarch64 = suqadd
+generate i8:u8:i8, i16:u16:i16
+
 ////////////////////
 // Floating-point absolute value
 ////////////////////
@@ -423,9 +517,30 @@ aarch64 = fcmgt
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
 arm = vcgt.s
-// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
+/// Compare greater than
+name = vcgt
+multi_fn = transmute, {vcgt-in_ntt-noext, {transmute, a}, {transmute, b}}
+a = 1
+b = 2
+validate 0
+
+aarch64 = cmgt
+generate i64:u64
+aarch64 = cmhi
+generate u64
+
+/// Floating-point compare greater than
+name = vcgt
+multi_fn = simd_extract, {vcgt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 1.
+b = 2.
+validate 0
+
+aarch64 = fcmgt
+generate f32:u32, f64:u64
+
 ////////////////////
 // lesser then
 ////////////////////
@@ -466,9 +581,30 @@ aarch64 = fcmgt
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
 arm = vcgt.s
-// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
+/// Compare less than
+name = vclt
+multi_fn = transmute, {vclt-in_ntt-noext, {transmute, a}, {transmute, b}}
+a = 2
+b = 1
+validate 0
+
+aarch64 = cmgt
+generate i64:u64
+aarch64 = cmhi
+generate u64
+
+/// Floating-point compare less than
+name = vclt
+multi_fn = simd_extract, {vclt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 2.
+b = 1.
+validate 0
+
+aarch64 = fcmgt
+generate f32:u32, f64:u64
+
 ////////////////////
 // lesser then equals
 ////////////////////
@@ -486,6 +622,26 @@ generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
 arm = vcge.s
 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
 
+/// Compare greater than or equal
+name = vcge
+multi_fn = transmute, {vcge-in_ntt-noext, {transmute, a}, {transmute, b}}
+a = 1
+b = 2
+validate 0
+
+aarch64 = cmge
+generate i64:u64, u64
+
+/// Floating-point compare greater than or equal
+name = vcge
+multi_fn = simd_extract, {vcge-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 1.
+b = 2.
+validate 0
+
+aarch64 = fcmge
+generate f32:u32, f64:u64
+
 /// Compare unsigned less than or equal
 name = vcle
 fn = simd_le
@@ -508,10 +664,31 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmge
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
-// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 arm = vcge.s
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
+/// Compare less than or equal
+name = vcle
+multi_fn = transmute, {vcle-in_ntt-noext, {transmute, a}, {transmute, b}}
+a = 2
+b = 1
+validate 0
+
+aarch64 = cmge
+generate i64:u64
+aaech64 = cmhs
+generate u64
+
+/// Floating-point compare less than or equal
+name = vcle
+multi_fn = simd_extract, {vcle-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 2.
+b = 1.
+validate 0
+
+aarch64 = fcmge
+generate f32:u32, f64:u64
+
 ////////////////////
 // greater then equals
 ////////////////////
@@ -553,7 +730,6 @@ aarch64 = fcmge
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
 arm = vcge.s
-// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
 /// Compare signed greater than or equal to zero
@@ -576,6 +752,24 @@ validate FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmge
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
+/// Compare signed greater than or equal to zero
+name = vcgez
+multi_fn = transmute, {vcgez-in_ntt-noext, {transmute, a}}
+a = -1
+validate 0
+
+aarch64 = cmge
+generate i64:u64
+
+/// Floating-point compare greater than or equal to zero
+name = vcgez
+multi_fn = simd_extract, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = -1.
+validate 0
+
+aarch64 = fcmge
+generate f32:u32, f64:u64
+
 /// Compare signed greater than zero
 name = vcgtz
 fn = simd_gt
@@ -596,6 +790,24 @@ validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmgt
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
+/// Compare signed greater than zero
+name = vcgtz
+multi_fn = transmute, {vcgtz-in_ntt-noext, {transmute, a}}
+a = -1
+validate 0
+
+aarch64 = cmgt
+generate i64:u64
+
+/// Floating-point compare greater than zero
+name = vcgtz
+multi_fn = simd_extract, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = -1.
+validate 0
+
+aarch64 = fcmgt
+generate f32:u32, f64:u64
+
 /// Compare signed less than or equal to zero
 name = vclez
 fn = simd_le
@@ -616,6 +828,24 @@ validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
 aarch64 = fcmle
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
+/// Compare less than or equal to zero
+name = vclez
+multi_fn = transmute, {vclez-in_ntt-noext, {transmute, a}}
+a = 2
+validate 0
+
+aarch64 = cmgt
+generate i64:u64
+
+/// Floating-point compare less than or equal to zero
+name = vclez
+multi_fn = simd_extract, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = 2.
+validate 0
+
+aarch64 = fcmle
+generate f32:u32, f64:u64
+
 /// Compare signed less than zero
 name = vcltz
 fn = simd_lt
@@ -636,6 +866,24 @@ validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
 aarch64 = fcmlt
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
+/// Compare less than zero
+name = vcltz
+multi_fn = transmute, {vcltz-in_ntt-noext, {transmute, a}}
+a = 2
+validate 0
+
+aarch64 = sshr
+generate i64:u64
+
+/// Floating-point compare less than zero
+name = vcltz
+multi_fn = simd_extract, {vcltz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = 2.
+validate 0
+
+aarch64 = fcmlt
+generate f32:u32, f64:u64
+
 /// Count leading sign bits
 name = vcls
 a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
@@ -671,11 +919,11 @@ generate uint*_t
 name = vcagt
 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
-validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
+validate !0, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
 
 aarch64 = facgt
 link-aarch64 = facgt._EXT2_._EXT_
-generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
 
 arm = vacgt.s
 link-arm = vacgt._EXT2_._EXT_
@@ -685,11 +933,11 @@ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 name = vcage
 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
-validate TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
+validate !0, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
 
 aarch64 = facge
 link-aarch64 = facge._EXT2_._EXT_
-generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
 
 arm = vacge.s
 link-arm = vacge._EXT2_._EXT_
@@ -700,10 +948,10 @@ name = vcalt
 multi_fn = vcagt-self-noext, b, a
 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
-validate FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
+validate 0, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
 
 aarch64 = facgt
-generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
 
 arm = vacgt.s
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
@@ -713,10 +961,10 @@ name = vcale
 multi_fn = vcage-self-noext , b, a
 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
-validate FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
+validate 0, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
 
 aarch64 = facge
-generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
 
 arm = vacge.s
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
@@ -936,6 +1184,16 @@ aarch64 = fcvtxn
 link-aarch64 = fcvtxn._EXT2_._EXT_
 generate float64x2_t:float32x2_t
 
+/// Floating-point convert to lower precision narrow, rounding to odd
+name = vcvtx
+double-suffixes
+multi_fn = simd_extract, {vcvtx-_f32_f64-noext, {vdupq_n-in_ntt-noext, a}}, 0
+a = -1.0
+validate -1.0
+
+aarch64 = fcvtxn
+generate f64:f32
+
 /// Floating-point convert to lower precision narrow, rounding to odd
 name = vcvtx_high
 noq-double-suffixes
@@ -1795,6 +2053,15 @@ generate int64x*_t
 arm = vneg.s
 generate int*_t
 
+/// Negate
+name = vneg
+multi_fn = -a
+a = 1
+validate -1
+
+aarch64 = neg
+generate i64
+
 /// Negate
 name = vneg
 fn = simd_neg
@@ -1820,6 +2087,15 @@ generate int64x*_t
 arm = vqneg.s
 generate int*_t
 
+/// Signed saturating negate
+name = vqneg
+multi_fn = simd_extract, {vqneg-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = 1
+validate -1
+
+aarch64 = sqneg
+generate i8, i16, i32, i64
+
 /// Saturating subtract
 name = vqsub
 a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
@@ -1884,6 +2160,45 @@ link-aarch64 = shadd._EXT_
 link-arm = vhadds._EXT_
 generate int*_t
 
+/// Rounding add returning high narrow
+name = vraddhn
+no-q
+a = MAX, MIN, 3, 4, 5, 6, 7, 8
+b = 1, 2, 3, 4, 5, 6, 7, 8
+validate MIN, MIN, 0, 0, 0, 0, 0, 0
+
+aarch64 = raddhn
+link-aarch64 = raddhn._EXT2_
+arm = vraddhn
+link-arm = vraddhn._EXT2_
+generate int16x8_t:int16x8_t:int8x8_t, int32x4_t:int32x4_t:int16x4_t, int64x2_t:int64x2_t:int32x2_t
+
+/// Rounding add returning high narrow
+name = vraddhn
+no-q
+multi_fn = transmute, {vraddhn-noqsigned-noext, {transmute, a}, {transmute, b}}
+a = MAX, MIN, 3, 4, 5, 6, 7, 8
+b = 1, 2, 3, 4, 5, 6, 7, 8
+validate 0, 0, 0, 0, 0, 0, 0, 0
+
+aarch64 = raddhn
+arm = vraddhn
+generate uint16x8_t:uint16x8_t:uint8x8_t, uint32x4_t:uint32x4_t:uint16x4_t, uint64x2_t:uint64x2_t:uint32x2_t
+
+/// Rounding add returning high narrow
+name = vraddhn_high
+no-q
+multi_fn = vraddhn-noqself-noext, x:in_t0, b, c
+multi_fn = simd_shuffle-out_len-!, a, x, {asc-0-out_len}
+a = 1, 2, 0, 0, 0, 0, 0, 0
+b = 1, 2, 3, 4, 5, 6, 7, 8
+c = 1, 2, 3, 4, 5, 6, 7, 8
+validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+
+aarch64 = raddhn2
+generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
+generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
+
 /// Reverse bit order
 name = vrbit
 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
@@ -1954,6 +2269,15 @@ arm = vrintn
 link-arm = vrintn._EXT_
 generate float*_t
 
+/// Floating-point round to integral, to nearest with ties to even
+name = vrndn
+a = -1.5
+validate -2.0
+
+aarch64 = frintn
+link-aarch64 = llvm.roundeven._EXT_
+generate f32
+
 /// Floating-point round to integral, toward minus infinity
 name = vrndm
 a = -1.5, 0.5, 1.5, 2.5
@@ -3190,6 +3514,28 @@ link-arm = vst4lane._EXTpi8r_
 const-arm = LANE
 generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
 
+/// Dot product index form with signed and unsigned integers
+name = vsudot
+out-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_dot-LANE
+multi_fn = simd_shuffle-in_len-!, c:unsigned, c, c, {base-4-LANE}
+multi_fn = vsudot-outlane-_, a, b, c
+a = 1, 2, 1, 2
+b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+n = 0
+validate 31, 72, 31, 72
+target = dotprod
+
+aarch64 = sudot
+link-aarch64 = usdot._EXT2_._EXT4_:int32x2_t:int8x8_t:uint8x8_t:int32x2_t
+// LLVM ERROR: Cannot select: intrinsic %llvm.aarch64.neon.usdot
+//generate int32x2_t:int8x8_t:uint8x8_t:int32x2_t, int32x2_t:int8x8_t:uint8x16_t:int32x2_t
+link-aarch64 = usdot._EXT2_._EXT4_:int32x4_t:int8x16_t:uint8x16_t:int32x4_t
+// LLVM ERROR: Cannot select: intrinsic %llvm.aarch64.neon.usdot
+//generate int32x4_t:int8x16_t:uint8x8_t:int32x4_t, int32x4_t:int8x16_t:uint8x16_t:int32x4_t
+
 /// Multiply
 name = vmul
 a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
@@ -3401,7 +3747,7 @@ target = aes
 
 aarch64 = pmull
 link-aarch64 = pmull64:p64:p64:p64:int8x16_t
-// Because of the support status of llvm, vmull_p64 is currently only available on aarch64
+// Because of the support status of llvm, vmull_p64 is currently only available on arm
 // arm = vmull
 // link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
 generate p64:p64:p128
@@ -3743,6 +4089,58 @@ generate float64x*_t
 arm = vsub.
 generate float*_t
 
+/// Subtract
+name = vsub
+multi_fn = a - b
+a = 3
+b = 2
+validate 1
+
+aarch64 = nop
+generate i64, u64
+
+/// Add
+name = vadd
+multi_fn = a + b
+a = 1
+b = 2
+validate 3
+
+aarch64 = nop
+generate i64, u64
+
+/// Bitwise exclusive OR
+name = vadd
+multi_fn = simd_xor, a, b
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+validate 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17
+
+aarch64 = nop
+arm = nop
+generate poly8x8_t, poly16x4_t, poly8x16_t, poly16x8_t, poly64x1_t, poly64x2_t
+
+/// Bitwise exclusive OR
+name = vaddq
+no-q
+multi_fn = a ^ b
+a = 16
+b = 1
+validate 17
+
+aarch64 = nop
+arm = nop
+generate p128
+
+/// Floating-point add across vector
+name = vaddv
+a = 1., 2., 0., 0.
+validate 3.
+
+aarch64 = faddv
+link-aarch64 = faddv._EXT2_._EXT_
+generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
+
 /// Signed Add Long across Vector
 name = vaddlv
 a = 1, 2, 3, 4
@@ -4085,6 +4483,209 @@ validate 6, 7
 aarch64 = usubl
 generate uint32x4_t:uint32x4_t:uint64x2_t
 
+/// Bit clear and exclusive OR
+name = vbcax
+multi_fn = simd_xor, a, {vbic-self-noext, b, c}
+a = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+validate 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
+
+// llvm does not currently support the `bcax` instruction
+aarch64 = nop
+generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
+generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
+
+/// Floating-point complex add
+name = vcadd_rot270
+no-q
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+validate 2., 0., 2., 0.
+target = fcma
+
+aarch64 = fcadd
+link-aarch64 = vcadd.rot270._EXT_
+generate float32x2_t
+name = vcaddq_rot270
+generate float32x4_t, float64x2_t
+
+/// Floating-point complex add
+name = vcadd_rot90
+no-q
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+validate 0., -2., 0., -2.
+target = fcma
+
+aarch64 = fcadd
+link-aarch64 = vcadd.rot90._EXT_
+generate float32x2_t
+name = vcaddq_rot90
+generate float32x4_t, float64x2_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+c = 1., 1., -1., -1.
+validate 0., -2., 2., 0.
+target = fcma
+
+aarch64 = fcmla
+link-aarch64 = vcmla.rot0._EXT_
+generate float32x2_t, float32x4_t, float64x2_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla_rot90
+rot-suffix
+a = 1., 1., 1., 1.
+b = 1., -1., 1., -1.
+c = 1., 1., 1., 1.
+validate 2., 0., 2., 0.
+target = fcma
+
+aarch64 = fcmla
+link-aarch64 = vcmla.rot90._EXT_
+generate float32x2_t, float32x4_t, float64x2_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla_rot180
+rot-suffix
+a = 1., 1., 1., 1.
+b = 1., -1., 1., -1.
+c = 1., 1., 1., 1.
+validate 0., 0., 0., 0.
+target = fcma
+
+aarch64 = fcmla
+link-aarch64 = vcmla.rot180._EXT_
+generate float32x2_t, float32x4_t, float64x2_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla_rot270
+rot-suffix
+a = 1., 1., 1., 1.
+b = 1., -1., 1., -1.
+c = 1., 1., 1., 1.
+validate 0., 2., 0., 2.
+target = fcma
+
+aarch64 = fcmla
+link-aarch64 = vcmla.rot270._EXT_
+generate float32x2_t, float32x4_t, float64x2_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla
+in2-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_rot-LANE
+multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
+multi_fn = vcmla-self-noext, a, b, c
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+c = 1., 1., -1., -1.
+n = 0
+validate 0., -2., 0., -2.
+target = fcma
+
+aarch64 = fcmla
+generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
+generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla_rot90
+rot-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_rot-LANE
+multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
+multi_fn = vcmla_rot90-rot-noext, a, b, c
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+c = 1., 1., -1., -1.
+n = 0
+validate 0., 0., 0., 0.
+target = fcma
+
+aarch64 = fcmla
+generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
+generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla_rot180
+rot-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_rot-LANE
+multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
+multi_fn = vcmla_rot180-rot-noext, a, b, c
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+c = 1., 1., -1., -1.
+n = 0
+validate 2., 0., 2., 0.
+target = fcma
+
+aarch64 = fcmla
+generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
+generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
+
+/// Floating-point complex multiply accumulate
+name = vcmla_rot270
+rot-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_rot-LANE
+multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
+multi_fn = vcmla_rot270-rot-noext, a, b, c
+a = 1., -1., 1., -1.
+b = -1., 1., -1., 1.
+c = 1., 1., -1., -1.
+n = 0
+validate 2., -2., 2., -2.
+target = fcma
+
+aarch64 = fcmla
+generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
+generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
+
+/// Dot product arithmetic
+name = vdot
+out-suffix
+a = 1, 2, 1, 2
+b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+validate 31, 176, 31, 176
+target = dotprod
+
+aarch64 = sdot
+link-aarch64 = sdot._EXT_._EXT3_
+generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
+
+aarch64 = udot
+link-aarch64 = udot._EXT_._EXT3_
+generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
+
+/// Dot product arithmetic
+name = vdot
+out-lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_dot-LANE
+multi_fn = simd_shuffle-in_len-!, c:in_t, c, c, {base-4-LANE}
+multi_fn = vdot-out-noext, a, b, c
+a = 1, 2, 1, 2
+b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
+n = 0
+validate 31, 72, 31, 72
+target = dotprod
+
+aarch64 = sdot
+generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x2_t:int8x8_t:int8x16_t:int32x2_t
+generate int32x4_t:int8x16_t:int8x8_t:int32x4_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
+
+aarch64 = udot
+generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x2_t:uint8x8_t:uint8x16_t:uint32x2_t
+generate uint32x4_t:uint8x16_t:uint8x8_t:uint32x4_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
+
 /// Maximum (vector)
 name = vmax
 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@@ -4136,6 +4737,15 @@ link-arm = vmaxnm._EXT_
 link-aarch64 = fmaxnm._EXT_
 generate float*_t
 
+/// Floating-point maximum number across vector
+name = vmaxnmv
+a = 1., 2., 0., 1.
+validate 2.
+
+aarch64 = fmaxnmv
+link-aarch64 = fmaxnmv._EXT2_._EXT_
+generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
+
 /// Floating-point Maximum Number Pairwise (vector).
 name = vpmaxnm
 a = 1.0, 2.0
@@ -4154,6 +4764,30 @@ aarch64 = fmaxnmp
 link-aarch64 = fmaxnmp._EXT_
 generate float32x4_t:float32x4_t:float32x4_t
 
+/// Floating-point maximum number pairwise
+name = vpmaxnm
+out-suffix
+a = 1., 2.
+validate 2.
+
+aarch64 = fmaxnmp
+link-aarch64 = fmaxnmv._EXT2_._EXT_
+generate float32x2_t:f32
+name = vpmaxnmq
+generate float64x2_t:f64
+
+/// Floating-point maximum pairwise
+name = vpmax
+out-suffix
+a = 1., 2.
+validate 2.
+
+aarch64 = fmaxp
+link-aarch64 = fmaxv._EXT2_._EXT_
+generate float32x2_t:f32
+name = vpmaxq
+generate float64x2_t:f64
+
 /// Minimum (vector)
 name = vmin
 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@@ -4205,11 +4839,79 @@ link-arm = vminnm._EXT_
 link-aarch64 = fminnm._EXT_
 generate float*_t
 
+/// Floating-point minimum number across vector
+name = vminnmv
+a = 1., 0., 2., 3.
+validate 0.
+
+aarch64 = fminnmv
+link-aarch64 = fminnmv._EXT2_._EXT_
+generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
+
+/// 8-bit integer matrix multiply-accumulate
+name = vmmlaq
+a = 1, 2, 3, 4
+b = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+validate 1, 2, 3, 4
+target = i8mm
+
+aarch64 = smmla
+link-aarch64 = smmla._EXT_._EXT3_
+// the feature `i8mm` is not valid for some target
+//generate int32x4_t:int8x16_t:int8x16_t:int32x4_t
+
+aarch64 = ummla
+link-aarch64 = ummla._EXT_._EXT3_
+// the feature `i8mm` is not valid for some target
+//generate uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
+
+/// Vector move
+name = vmovl_high
+no-q
+multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-halflen-halflen}
+multi_fn = vmovl-noqself-noext, a
+a = 1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10
+validate 3, 4, 5, 6, 7, 8, 9, 10
+
+aarch64 = sshll2
+generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
+
+aarch64 = ushll2
+generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
+
+/// Floating-point add pairwise
+name = vpadd
+a = 1., 2., 3., 4.
+b = 3., 4., 5., 6.
+validate 3., 7., 7., 11.
+
+aarch64 = faddp
+link-aarch64 = faddp._EXT_
+generate float32x4_t, float64x2_t
+
+arm = vpadd
+link-arm = vpadd._EXT_
+generate float32x2_t
+
+/// Floating-point add pairwise
+name = vpadd
+out-suffix
+multi_fn = simd_extract, a1:out_t, a, 0
+multi_fn = simd_extract, a2:out_t, a, 1
+multi_fn = a1 + a2
+a = 1., 2.
+validate 3.
+
+aarch64 = nop
+generate float32x2_t:f32, float64x2_t:f64
+
 /// Floating-point Minimum Number Pairwise (vector).
 name = vpminnm
 a = 1.0, 2.0
 b = 6.0, -3.0
 validate 1.0, -3.0
+
 aarch64 = fminnmp
 link-aarch64 = fminnmp._EXT_
 generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
@@ -4223,6 +4925,30 @@ aarch64 = fminnmp
 link-aarch64 = fminnmp._EXT_
 generate float32x4_t:float32x4_t:float32x4_t
 
+/// Floating-point minimum number pairwise
+name = vpminnm
+out-suffix
+a = 1., 2.
+validate 1.
+
+aarch64 = fminnmp
+link-aarch64 = fminnmv._EXT2_._EXT_
+generate float32x2_t:f32
+name = vpminnmq
+generate float64x2_t:f64
+
+/// Floating-point minimum pairwise
+name = vpmin
+out-suffix
+a = 1., 2.
+validate 1.
+
+aarch64 = fminp
+link-aarch64 = fminv._EXT2_._EXT_
+generate float32x2_t:f32
+name = vpminq
+generate float64x2_t:f64
+
 /// Signed saturating doubling multiply long
 name = vqdmull
 a = 0, 1, 2, 3, 4, 5, 6, 7
@@ -4452,6 +5178,35 @@ validate 17, 22, 27, 32
 aarch64 = sqdmlal2
 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
 
+/// Signed saturating doubling multiply-add long
+name = vqdmlal
+multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
+multi_fn = vqadd-out-noext, a, {simd_extract, x, 0}
+a = 1
+b = 1
+c = 2
+validate 5
+
+aarch64 = sqdmull
+generate i32:i16:i16:i32, i64:i32:i32:i64
+
+/// Signed saturating doubling multiply-add long
+name = vqdmlalh_lane
+in2-suffix
+constn = LANE
+multi_fn = static_assert_imm-in2_exp_len-LANE
+multi_fn = vqdmlal-self-noext, a, b, {simd_extract, c, LANE as u32}
+a = 1
+b = 1
+c = 2, 1, 1, 1, 1, 1, 1, 1
+n = 0
+validate 5
+
+aarch64 = sqdmull
+generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
+name = vqdmlals_lane
+generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
+
 /// Signed saturating doubling multiply-subtract long
 name = vqdmlsl
 multi_fn = vqsub-out-noext, a, {vqdmull-self-noext, b, c}
@@ -4534,6 +5289,35 @@ validate -1, -2, -3, -4
 aarch64 = sqdmlsl2
 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
 
+/// Signed saturating doubling multiply-subtract long
+name = vqdmlsl
+multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
+multi_fn = vqsub-out-noext, a, {simd_extract, x, 0}
+a = 10
+b = 1
+c = 2
+validate 6
+
+aarch64 = sqdmull
+generate i32:i16:i16:i32, i64:i32:i32:i64
+
+/// Signed saturating doubling multiply-subtract long
+name = vqdmlslh_lane
+in2-suffix
+constn = LANE
+multi_fn = static_assert_imm-in2_exp_len-LANE
+multi_fn = vqdmlsl-self-noext, a, b, {simd_extract, c, LANE as u32}
+a = 10
+b = 1
+c = 2, 1, 1, 1, 1, 1, 1, 1
+n = 0
+validate 6
+
+aarch64 = sqdmull
+generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
+name = vqdmlsls_lane
+generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
+
 /// Signed saturating doubling multiply returning high half
 name = vqdmulh
 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
@@ -4612,6 +5396,24 @@ validate 1
 aarch64 = sqdmulh
 generate i32:int32x2_t:i32, i32:int32x4_t:i32
 
+/// Vector saturating doubling multiply high by scalar
+name = vqdmulh
+lane-suffixes
+constn = LANE
+multi_fn = static_assert_imm-in2_exp_len-LANE
+multi_fn = vqdmulh-out-noext, a, {vdup-nout-noext, {simd_extract, b, LANE as u32}}
+a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
+b = 2, 1, 1, 1, 1, 1, 1, 1
+n = 0
+validate 1, 1, 1, 1, 1, 1, 1, 1
+
+aarch64 = sqdmulh
+generate int16x4_t, int16x8_t:int16x4_t:int16x8_t
+generate int32x2_t, int32x4_t:int32x2_t:int32x4_t
+arm = vqdmulh
+generate int16x8_t, int16x4_t:int16x8_t:int16x4_t
+generate int32x4_t, int32x2_t:int32x4_t:int32x2_t
+
 /// Signed saturating extract narrow
 name = vqmovn
 no-q
@@ -5323,6 +6125,28 @@ validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
 aarch64 = sqshrun2
 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
 
+/// Unsigned saturating accumulate of signed value
+name = vsqadd
+out-suffix
+multi_fn = simd_extract, {vsqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
+a = 2
+b = 2
+validate 4
+
+aarch64 = usqadd
+generate u8:i8:u8, u16:i16:u16
+
+/// Unsigned saturating accumulate of signed value
+name = vsqadd
+out-suffix
+a = 2
+b = 2
+validate 4
+
+aarch64 = usqadd
+link-aarch64 = usqadd._EXT_
+generate u32:i32:u32, u64:i64:u64
+
 /// Calculates the square root of each lane.
 name = vsqrt
 fn = simd_fsqrt
@@ -5339,10 +6163,35 @@ validate 0.998046875, 0.705078125, 0.576171875, 0.4990234375
 
 aarch64 = frsqrte
 link-aarch64 = frsqrte._EXT_
-generate float64x*_t
+generate float64x*_t, f32, f64
+
+arm = vrsqrte
+link-arm = vrsqrte._EXT_
+generate float*_t
+
+/// Unsigned reciprocal square root estimate
+name = vrsqrte
+a = 1, 2, 3, 4
+validate 4294967295, 4294967295, 4294967295, 4294967295
 
+aarch64 = ursqrte
+link-aarch64 = ursqrte._EXT_
 arm = vrsqrte
 link-arm = vrsqrte._EXT_
+generate uint32x2_t, uint32x4_t
+
+/// Floating-point reciprocal square root step
+name = vrsqrts
+a = 1.0, 2.0, 3.0, 4.0
+b = 1.0, 2.0, 3.0, 4.0
+validate 1., -0.5, -3.0, -6.5
+
+aarch64 = frsqrts
+link-aarch64 = frsqrts._EXT_
+generate float64x*_t, f32, f64
+
+arm = vrsqrts
+link-arm = vrsqrts._EXT_
 generate float*_t
 
 /// Reciprocal estimate.
@@ -5352,12 +6201,46 @@ validate 0.24951171875, 0.3330078125, 0.4990234375, 0.998046875
 
 aarch64 = frecpe
 link-aarch64 = frecpe._EXT_
-generate float64x*_t
+generate float64x*_t, f32, f64
 
 arm = vrecpe
 link-arm = vrecpe._EXT_
 generate float*_t
 
+/// Unsigned reciprocal estimate
+name = vrecpe
+a = 4, 3, 2, 1
+validate 4294967295, 4294967295, 4294967295, 4294967295
+
+aarch64 = urecpe
+link-aarch64 = urecpe._EXT_
+arm = vrecpe
+link-arm = vrecpe._EXT_
+generate uint32x2_t, uint32x4_t
+
+/// Floating-point reciprocal step
+name = vrecps
+a = 4.0, 3.0, 2.0, 1.0
+b = 4.0, 3.0, 2.0, 1.0
+validate -14., -7., -2., 1.
+
+aarch64 = frecps
+link-aarch64 = frecps._EXT_
+generate float64x*_t, f32, f64
+
+arm = vrecps
+link-arm = vrecps._EXT_
+generate float*_t
+
+/// Floating-point reciprocal exponent
+name = vrecpx
+a = 4.0
+validate 0.5
+
+aarch64 = frecpx
+link-aarch64 = frecpx._EXT_
+generate f32, f64
+
 /// Vector reinterpret cast operation
 name = vreinterpret
 double-suffixes
@@ -5730,6 +6613,45 @@ validate 2
 aarch64 = ursra
 generate u64
 
+/// Rounding subtract returning high narrow
+name = vrsubhn
+no-q
+a = MAX, MIN, 0, 4, 5, 6, 7, 8
+b = 1, 2, 3, 4, 5, 6, 7, 8
+validate MIN, MIN, 0, 0, 0, 0, 0, 0
+
+aarch64 = rsubhn
+link-aarch64 = rsubhn._EXT2_
+arm = vrsubhn
+link-arm = vrsubhn._EXT2_
+generate int16x8_t:int16x8_t:int8x8_t, int32x4_t:int32x4_t:int16x4_t, int64x2_t:int64x2_t:int32x2_t
+
+/// Rounding subtract returning high narrow
+name = vrsubhn
+no-q
+multi_fn = transmute, {vrsubhn-noqsigned-noext, {transmute, a}, {transmute, b}}
+a = MAX, MIN, 3, 4, 5, 6, 7, 8
+b = 1, 2, 3, 4, 5, 6, 7, 8
+validate 0, 0, 0, 0, 0, 0, 0, 0
+
+aarch64 = rsubhn
+arm = vrsubhn
+generate uint16x8_t:uint16x8_t:uint8x8_t, uint32x4_t:uint32x4_t:uint16x4_t, uint64x2_t:uint64x2_t:uint32x2_t
+
+/// Rounding subtract returning high narrow
+name = vrsubhn_high
+no-q
+multi_fn = vrsubhn-noqself-noext, x:in_t0, b, c
+multi_fn = simd_shuffle-out_len-!, a, x, {asc-0-out_len}
+a = 1, 2, 0, 0, 0, 0, 0, 0
+b = 1, 2, 3, 4, 5, 6, 7, 8
+c = 1, 2, 3, 4, 5, 6, 7, 8
+validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+
+aarch64 = rsubhn2
+generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
+generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
+
 /// Insert vector element from another vector element
 name = vset_lane
 constn = LANE
@@ -5975,6 +6897,38 @@ aarch64 = usra
 arm = vsra
 generate uint*_t, uint64x*_t
 
+/// Transpose elements
+name = vtrn
+multi_fn = simd_shuffle-in_len-!, a1:in_t, a, b, {transpose-1-in_len}
+multi_fn = simd_shuffle-in_len-!, b1:in_t, a, b, {transpose-2-in_len}
+multi_fn = transmute, (a1, b1)
+a = 0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30
+b = 1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31
+validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
+
+aarch64 = trn
+arm = vtrn
+generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
+generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
+generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
+aarch64 = zip
+generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
+
+/// Transpose elements
+name = vtrn
+multi_fn = simd_shuffle-in_len-!, a1:in_t, a, b, {transpose-1-in_len}
+multi_fn = simd_shuffle-in_len-!, b1:in_t, a, b, {transpose-2-in_len}
+multi_fn = transmute, (a1, b1)
+a = 0., 2., 2., 6.
+b = 1., 3., 3., 7.
+validate 0., 1., 2., 3., 2., 3., 6., 7.
+
+aarch64 = zip
+arm = vtrn
+generate float32x2_t:float32x2_t:float32x2x2_t
+aarch64 = trn
+generate float32x4_t:float32x4_t:float32x4x2_t
+
 /// Transpose vectors
 name = vtrn1
 multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len}
@@ -6027,6 +6981,35 @@ generate float32x4_t
 aarch64 = zip2
 generate float32x2_t, float64x2_t
 
+/// Zip vectors
+name = vzip
+multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {zip-1-in_len}
+multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {zip-2-in_len}
+multi_fn = transmute, (a0, b0)
+a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+
+aarch64 = zip
+arm = vzip
+generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
+generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
+generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
+generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
+
+/// Zip vectors
+name = vzip
+multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {zip-1-in_len}
+multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {zip-2-in_len}
+multi_fn = transmute, (a0, b0)
+a = 1., 2., 3., 4.
+b = 5., 6., 7., 8.
+validate 1., 5., 2., 6., 3., 7., 4., 8.
+
+aarch64 = zip
+arm = vzip
+generate float32x2_t:float32x2_t:float32x2x2_t, float32x4_t:float32x4_t:float32x4x2_t
+
 /// Zip vectors
 name = vzip1
 multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len}
@@ -6067,6 +7050,40 @@ validate 8., 9., 10., 11., 12., 13., 14., 15.
 aarch64 = zip2
 generate float32x2_t, float32x4_t, float64x2_t
 
+/// Unzip vectors
+name = vuzp
+multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {unzip-1-in_len}
+multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {unzip-2-in_len}
+multi_fn = transmute, (a0, b0)
+a = 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16
+b = 2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32
+validate 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32
+
+aarch64 = uzp
+arm = vzup
+generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
+generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
+generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
+aarch64 = zip
+arm = vtrn
+generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
+
+/// Unzip vectors
+name = vuzp
+multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {unzip-1-in_len}
+multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {unzip-2-in_len}
+multi_fn = transmute, (a0, b0)
+a = 1., 2., 2., 4.
+b = 2., 6., 6., 8.
+validate 1., 2., 2., 6., 2., 4., 6., 8.
+
+aarch64 = zip
+arm = vtrn
+generate float32x2_t:float32x2_t:float32x2x2_t
+aarch64 = uzp
+arm = vzup
+generate float32x4_t:float32x4_t:float32x4x2_t
+
 /// Unzip vectors
 name = vuzp1
 multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len}
@@ -6298,3 +7315,21 @@ validate MAX, 7
 aarch64 = sqabs
 link-aarch64 = sqabs._EXT_
 generate int64x*_t
+
+/// Signed saturating absolute value
+name = vqabs
+multi_fn = simd_extract, {vqabs-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
+a = -7
+validate 7
+
+aarch64 = sqabs
+generate i8:i8, i16:i16
+
+/// Signed saturating absolute value
+name = vqabs
+a = -7
+validate 7
+
+aarch64 = sqabs
+link-aarch64 = sqabs._EXT_
+generate i32:i32, i64:i64
diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs
index ec76d5639b..99165d8706 100644
--- a/crates/stdarch-gen/src/main.rs
+++ b/crates/stdarch-gen/src/main.rs
@@ -96,9 +96,9 @@ fn type_bits(t: &str) -> usize {
     }
 }
 
-fn type_exp_len(t: &str) -> usize {
+fn type_exp_len(t: &str, base_len: usize) -> usize {
     let t = type_to_sub_type(t);
-    let len = type_len(&t);
+    let len = type_len(&t) / base_len;
     match len {
         1 => 0,
         2 => 1,
@@ -242,6 +242,7 @@ fn type_to_suffix(t: &str) -> &str {
         "f64" => "d_f64",
         "p8" => "b_p8",
         "p16" => "h_p16",
+        "p128" => "q_p128",
         _ => panic!("unknown type: {}", t),
     }
 }
@@ -328,35 +329,48 @@ fn type_to_noq_n_suffix(t: &str) -> &str {
     }
 }
 
-fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
+fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str, re_to_out: bool) -> String {
     let mut str = String::new();
     let suf = type_to_suffix(out_t);
     if !suf.starts_with("_") {
         str.push_str(&suf[0..1]);
     }
     str.push_str("_lane");
-    str.push_str(type_to_suffix(in_t));
+    if !re_to_out {
+        str.push_str(type_to_suffix(in_t));
+    } else {
+        if type_to_suffix(in_t).starts_with("q") {
+            str.push_str("q");
+        };
+        let suf2 = type_to_noq_suffix(out_t);
+        str.push_str(suf2);
+    }
     str
 }
 
-fn type_to_signed(t: &String) -> String {
+fn type_to_rot_suffix(c_name: &str, suf: &str) -> String {
+    let ns: Vec<_> = c_name.split('_').collect();
+    assert_eq!(ns.len(), 2);
+    if suf.starts_with("q") {
+        format!("{}q_{}{}", ns[0], ns[1], &suf[1..])
+    } else {
+        format!("{}{}", c_name, suf)
+    }
+}
+
+fn type_to_signed(t: &str) -> String {
     let s = t.replace("uint", "int");
     let s = s.replace("poly", "int");
     s
 }
 
-fn type_to_unsigned(t: &str) -> &str {
-    match t {
-        "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "uint8x8_t",
-        "int8x16_t" | "uint8x16_t" | "poly8x16_t" => "uint8x16_t",
-        "int16x4_t" | "uint16x4_t" | "poly16x4_t" => "uint16x4_t",
-        "int16x8_t" | "uint16x8_t" | "poly16x8_t" => "uint16x8_t",
-        "int32x2_t" | "uint32x2_t" => "uint32x2_t",
-        "int32x4_t" | "uint32x4_t" => "uint32x4_t",
-        "int64x1_t" | "uint64x1_t" | "poly64x1_t" => "uint64x1_t",
-        "int64x2_t" | "uint64x2_t" | "poly64x2_t" => "uint64x2_t",
-        _ => panic!("unknown type: {}", t),
+fn type_to_unsigned(t: &str) -> String {
+    if t.contains("uint") {
+        return t.to_string();
     }
+    let s = t.replace("int", "uint");
+    let s = s.replace("poly", "uint");
+    s
 }
 
 fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
@@ -411,6 +425,7 @@ fn type_to_noq_suffix(t: &str) -> &str {
         "poly8x8_t" | "poly8x16_t" => "_p8",
         "poly16x4_t" | "poly16x8_t" => "_p16",
         "poly64x1_t" | "poly64x2_t" | "p64" => "_p64",
+        "p128" => "_p128",
         _ => panic!("unknown type: {}", t),
     }
 }
@@ -434,6 +449,9 @@ enum Suffix {
     Lane,
     In2,
     In2Lane,
+    OutLane,
+    Rot,
+    RotLane,
 }
 
 #[derive(Clone, Copy)]
@@ -443,6 +461,9 @@ enum TargetFeature {
     Vfp4,
     FPArmV8,
     AES,
+    FCMA,
+    Dotprod,
+    I8MM,
 }
 
 #[derive(Clone, Copy)]
@@ -659,16 +680,30 @@ fn values(t: &str, vs: &[String]) -> String {
     } else if vs.len() == 1 && type_to_global_type(t) == "f64" {
         format!(": {} = {}", type_to_global_type(t), vs[0])
     } else {
-        format!(
-            ": {} = {}::new({})",
-            type_to_global_type(t),
-            type_to_global_type(t),
-            vs.iter()
-                .map(|v| map_val(type_to_global_type(t), v))
-                //.map(|v| format!("{}{}", v, type_to_native_type(t)))
-                .collect::<Vec<_>>()
-                .join(", ")
-        )
+        let s: Vec<_> = t.split('x').collect();
+        if s.len() == 3 {
+            format!(
+                ": [{}; {}] = [{}]",
+                type_to_native_type(t),
+                type_len(t),
+                vs.iter()
+                    .map(|v| map_val(type_to_global_type(t), v))
+                    //.map(|v| format!("{}{}", v, type_to_native_type(t)))
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            )
+        } else {
+            format!(
+                ": {} = {}::new({})",
+                type_to_global_type(t),
+                type_to_global_type(t),
+                vs.iter()
+                    .map(|v| map_val(type_to_global_type(t), v))
+                    //.map(|v| format!("{}{}", v, type_to_native_type(t)))
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            )
+        }
     }
 }
 
@@ -909,6 +944,7 @@ fn ext(s: &str, in_t: &[&str; 3], out_t: &str) -> String {
     s.replace("_EXT_", &type_to_ext(in_t[0], false, false, false))
         .replace("_EXT2_", &type_to_ext(out_t, false, false, false))
         .replace("_EXT3_", &type_to_ext(in_t[1], false, false, false))
+        .replace("_EXT4_", &type_to_ext(in_t[2], false, false, false))
         .replace("_EXTr3_", &type_to_ext(in_t[1], false, true, false))
         .replace("_EXTv2_", &type_to_ext(out_t, true, false, false))
         .replace("_EXTpi8_", &type_to_ext(in_t[1], false, false, true))
@@ -1003,9 +1039,24 @@ fn gen_aarch64(
             current_name,
             type_to_lane_suffix(&type_to_sub_type(in_t[1]))
         ),
-        Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
+        Lane => format!(
+            "{}{}",
+            current_name,
+            type_to_lane_suffixes(out_t, in_t[1], false)
+        ),
         In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
-        In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
+        In2Lane => format!(
+            "{}{}",
+            current_name,
+            type_to_lane_suffixes(out_t, in_t[2], false)
+        ),
+        OutLane => format!(
+            "{}{}",
+            current_name,
+            type_to_lane_suffixes(out_t, in_t[2], true)
+        ),
+        Rot => type_to_rot_suffix(current_name, type_to_suffix(out_t)),
+        RotLane => type_to_rot_suffix(current_name, &type_to_lane_suffixes(out_t, in_t[2], false)),
     };
     let current_target = match target {
         Default => "neon",
@@ -1013,6 +1064,9 @@ fn gen_aarch64(
         Vfp4 => "vfp4",
         FPArmV8 => "fp-armv8,v8",
         AES => "neon,aes",
+        FCMA => "neon,fcma",
+        Dotprod => "neon,dotprod",
+        I8MM => "neon,i8mm",
     };
     let current_fn = if let Some(current_fn) = current_fn.clone() {
         if link_aarch64.is_some() {
@@ -1545,6 +1599,10 @@ fn gen_test(
         } else {
             String::new()
         };
+        let r_type = match type_sub_len(out_t) {
+            1 => type_to_global_type(out_t).to_string(),
+            _ => format!("[{}; {}]", type_to_native_type(out_t), type_len(out_t)),
+        };
         let t = {
             match para_num {
                 1 => {
@@ -1557,7 +1615,7 @@ fn gen_test(
 "#,
                         values(in_t[0], &a),
                         values(out_t, &e),
-                        type_to_global_type(out_t),
+                        r_type,
                         name,
                         const_value
                     )
@@ -1574,7 +1632,7 @@ fn gen_test(
                         values(in_t[0], &a),
                         values(in_t[1], &b),
                         values(out_t, &e),
-                        type_to_global_type(out_t),
+                        r_type,
                         name,
                         const_value
                     )
@@ -1593,7 +1651,7 @@ fn gen_test(
                         values(in_t[1], &b),
                         values(in_t[2], &c),
                         values(out_t, &e),
-                        type_to_global_type(out_t),
+                        r_type,
                         name,
                         const_value
                     )
@@ -1686,9 +1744,24 @@ fn gen_arm(
             current_name,
             type_to_lane_suffix(&type_to_sub_type(in_t[1]))
         ),
-        Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
+        Lane => format!(
+            "{}{}",
+            current_name,
+            type_to_lane_suffixes(out_t, in_t[1], false)
+        ),
         In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
-        In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
+        In2Lane => format!(
+            "{}{}",
+            current_name,
+            type_to_lane_suffixes(out_t, in_t[2], false)
+        ),
+        OutLane => format!(
+            "{}{}",
+            current_name,
+            type_to_lane_suffixes(out_t, in_t[2], true)
+        ),
+        Rot => type_to_rot_suffix(current_name, type_to_suffix(out_t)),
+        RotLane => type_to_rot_suffix(current_name, &type_to_lane_suffixes(out_t, in_t[2], false)),
     };
     let current_aarch64 = current_aarch64
         .clone()
@@ -1699,6 +1772,9 @@ fn gen_arm(
         Vfp4 => "neon",
         FPArmV8 => "neon",
         AES => "neon,aes",
+        FCMA => "neon,fcma",
+        Dotprod => "neon,dotprod",
+        I8MM => "neon,i8mm",
     };
     let current_target_arm = match target {
         Default => "v7",
@@ -1706,6 +1782,9 @@ fn gen_arm(
         Vfp4 => "vfp4",
         FPArmV8 => "fp-armv8,v8",
         AES => "aes,v8",
+        FCMA => "v8",    // v8.3a
+        Dotprod => "v8", // v8.2a
+        I8MM => "v8",    // v8.6a
     };
     let current_fn = if let Some(current_fn) = current_fn.clone() {
         if link_aarch64.is_some() || link_arm.is_some() {
@@ -2477,6 +2556,34 @@ fn get_call(
         };
         return asc(start, len);
     }
+    if fn_name.starts_with("base") {
+        let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
+        assert_eq!(fn_format.len(), 3);
+        let mut s = format!("<const {}: i32> [", &fn_format[2]);
+        let base_len = fn_format[1].parse::<usize>().unwrap();
+        for i in 0..type_len(in_t[1]) / base_len {
+            for j in 0..base_len {
+                if i != 0 || j != 0 {
+                    s.push_str(", ");
+                }
+                s.push_str(&format!("{} * {} as u32", base_len, &fn_format[2]));
+                if j != 0 {
+                    s.push_str(&format!(" + {}", j));
+                }
+            }
+        }
+        s.push_str("]");
+        return s;
+    }
+    if fn_name.starts_with("as") {
+        let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
+        assert_eq!(fn_format.len(), 3);
+        let t = match &*fn_format[2] {
+            "in_ttn" => type_to_native_type(in_t[1]),
+            _ => String::new(),
+        };
+        return format!("{} as {}", &fn_format[1], t);
+    }
     if fn_name.starts_with("ins") {
         let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
         let n = n.unwrap();
@@ -2509,13 +2616,15 @@ fn get_call(
     if fn_name.starts_with("static_assert_imm") {
         let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
         let len = match &*fn_format[1] {
-            "out_exp_len" => type_exp_len(out_t),
+            "out_exp_len" => type_exp_len(out_t, 1),
             "out_bits_exp_len" => type_bits_exp_len(out_t),
-            "in_exp_len" => type_exp_len(in_t[1]),
+            "in_exp_len" => type_exp_len(in_t[1], 1),
             "in_bits_exp_len" => type_bits_exp_len(in_t[1]),
-            "in0_exp_len" => type_exp_len(in_t[0]),
-            "in1_exp_len" => type_exp_len(in_t[1]),
-            "in2_exp_len" => type_exp_len(in_t[2]),
+            "in0_exp_len" => type_exp_len(in_t[0], 1),
+            "in1_exp_len" => type_exp_len(in_t[1], 1),
+            "in2_exp_len" => type_exp_len(in_t[2], 1),
+            "in2_rot" => type_exp_len(in_t[2], 2),
+            "in2_dot" => type_exp_len(in_t[2], 4),
             _ => 0,
         };
         if len == 0 {
@@ -2558,9 +2667,9 @@ fn get_call(
     if fn_name.starts_with("matchn") {
         let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
         let len = match &*fn_format[1] {
-            "out_exp_len" => type_exp_len(out_t),
-            "in_exp_len" => type_exp_len(in_t[1]),
-            "in0_exp_len" => type_exp_len(in_t[0]),
+            "out_exp_len" => type_exp_len(out_t, 1),
+            "in_exp_len" => type_exp_len(in_t[1], 1),
+            "in0_exp_len" => type_exp_len(in_t[0], 1),
             _ => 0,
         };
         let mut call = format!("match {} & 0b{} {{\n", &fn_format[2], "1".repeat(len));
@@ -2644,6 +2753,10 @@ fn get_call(
                 re = Some((re_params[0].clone(), in_t[1].to_string()));
             } else if re_params[1] == "in_t" {
                 re = Some((re_params[0].clone(), in_t[1].to_string()));
+            } else if re_params[1] == "signed" {
+                re = Some((re_params[0].clone(), type_to_signed(in_t[1])));
+            } else if re_params[1] == "unsigned" {
+                re = Some((re_params[0].clone(), type_to_unsigned(in_t[1])));
             } else if re_params[1] == "in_t0" {
                 re = Some((re_params[0].clone(), in_t[0].to_string()));
             } else if re_params[1] == "in_t1" {
@@ -2664,6 +2777,11 @@ fn get_call(
                 ));
             } else if re_params[1] == "out_ntt" {
                 re = Some((re_params[0].clone(), native_type_to_type(out_t).to_string()));
+            } else if re_params[1] == "out_long_ntt" {
+                re = Some((
+                    re_params[0].clone(),
+                    native_type_to_long_type(out_t).to_string(),
+                ));
             } else {
                 re = Some((re_params[0].clone(), re_params[1].clone()));
             }
@@ -2691,6 +2809,12 @@ fn get_call(
             });
         return format!(r#"[{}]"#, &half[..half.len() - 2]);
     }
+    if fn_name == "a - b" {
+        return fn_name;
+    }
+    if fn_name == "-a" {
+        return fn_name;
+    }
     if fn_name.contains('-') {
         let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
         assert_eq!(fn_format.len(), 3);
@@ -2715,7 +2839,9 @@ fn get_call(
         } else if fn_format[1] == "in2" {
             fn_name.push_str(type_to_suffix(in_t[2]));
         } else if fn_format[1] == "in2lane" {
-            fn_name.push_str(&type_to_lane_suffixes(out_t, in_t[2]));
+            fn_name.push_str(&type_to_lane_suffixes(out_t, in_t[2], false));
+        } else if fn_format[1] == "outlane" {
+            fn_name.push_str(&type_to_lane_suffixes(out_t, in_t[2], true));
         } else if fn_format[1] == "signed" {
             fn_name.push_str(type_to_suffix(&type_to_signed(&String::from(in_t[1]))));
         } else if fn_format[1] == "outsigned" {
@@ -2741,7 +2867,7 @@ fn get_call(
                 &String::from(in_t[1]),
             ))));
         } else if fn_format[1] == "unsigned" {
-            fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1])));
+            fn_name.push_str(type_to_suffix(&type_to_unsigned(in_t[1])));
         } else if fn_format[1] == "doubleself" {
             fn_name.push_str(&type_to_double_suffixes(out_t, in_t[1]));
         } else if fn_format[1] == "noq_doubleself" {
@@ -2769,6 +2895,8 @@ fn get_call(
             fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1])));
         } else if fn_format[1] == "out_ntt" {
             fn_name.push_str(type_to_suffix(native_type_to_type(out_t)));
+        } else if fn_format[1] == "rot" {
+            fn_name = type_to_rot_suffix(&fn_name, type_to_suffix(out_t));
         } else {
             fn_name.push_str(&fn_format[1]);
         };
@@ -2976,6 +3104,12 @@ mod test {
             suffix = In2;
         } else if line.starts_with("in2-lane-suffixes") {
             suffix = In2Lane;
+        } else if line.starts_with("out-lane-suffixes") {
+            suffix = OutLane;
+        } else if line.starts_with("rot-suffix") {
+            suffix = Rot;
+        } else if line.starts_with("rot-lane-suffixes") {
+            suffix = RotLane;
         } else if line.starts_with("a = ") {
             a = line[4..].split(',').map(|v| v.trim().to_string()).collect();
         } else if line.starts_with("b = ") {
@@ -3010,6 +3144,9 @@ mod test {
                     "vfp4" => Vfp4,
                     "fp-armv8" => FPArmV8,
                     "aes" => AES,
+                    "fcma" => FCMA,
+                    "dotprod" => Dotprod,
+                    "i8mm" => I8MM,
                     _ => Default,
                 },
                 _ => Default,
diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs
index 9047a08ef7..bd894e0baa 100644
--- a/crates/stdarch-verify/tests/arm.rs
+++ b/crates/stdarch-verify/tests/arm.rs
@@ -352,6 +352,8 @@ fn verify_all_signatures() {
                 "vsliq_n_p8",
                 "vsli_n_p16",
                 "vsliq_n_p16",
+                "vsli_n_p64",
+                "vsliq_n_p64",
                 "vsri_n_s8",
                 "vsriq_n_s8",
                 "vsri_n_s16",
@@ -372,6 +374,8 @@ fn verify_all_signatures() {
                 "vsriq_n_p8",
                 "vsri_n_p16",
                 "vsriq_n_p16",
+                "vsri_n_p64",
+                "vsriq_n_p64",
                 "__smulbb",
                 "__smultb",
                 "__smulbt",
@@ -486,6 +490,75 @@ fn verify_all_signatures() {
             "vqrdmlsh_laneq_s32",
             "vqrdmlshq_lane_s32",
             "vqrdmlshq_laneq_s32",
+            "vcadd_rot270_f32",
+            "vcadd_rot90_f32",
+            "vcaddq_rot270_f32",
+            "vcaddq_rot270_f64",
+            "vcaddq_rot90_f32",
+            "vcaddq_rot90_f64",
+            "vcmla_f32",
+            "vcmlaq_f32",
+            "vcmlaq_f64",
+            "vcmla_rot90_f32",
+            "vcmlaq_rot90_f32",
+            "vcmlaq_rot90_f64",
+            "vcmla_rot180_f32",
+            "vcmlaq_rot180_f32",
+            "vcmlaq_rot180_f64",
+            "vcmla_rot270_f32",
+            "vcmlaq_rot270_f32",
+            "vcmlaq_rot270_f64",
+            "vcmla_lane_f32",
+            "vcmla_laneq_f32",
+            "vcmlaq_lane_f32",
+            "vcmlaq_laneq_f32",
+            "vcmla_rot90_lane_f32",
+            "vcmla_rot90_laneq_f32",
+            "vcmlaq_rot90_lane_f32",
+            "vcmlaq_rot90_laneq_f32",
+            "vcmla_rot180_lane_f32",
+            "vcmla_rot180_laneq_f32",
+            "vcmlaq_rot180_lane_f32",
+            "vcmlaq_rot180_laneq_f32",
+            "vcmla_rot270_lane_f32",
+            "vcmla_rot270_laneq_f32",
+            "vcmlaq_rot270_lane_f32",
+            "vcmlaq_rot270_laneq_f32",
+            "vdot_s32",
+            "vdot_u32",
+            "vdotq_s32",
+            "vdotq_u32",
+            "vdot_lane_s32",
+            "vdot_laneq_s32",
+            "vdotq_lane_s32",
+            "vdotq_laneq_s32",
+            "vdot_lane_u32",
+            "vdot_laneq_u32",
+            "vdotq_lane_u32",
+            "vdotq_laneq_u32",
+            "vbcaxq_s8",
+            "vbcaxq_s16",
+            "vbcaxq_s32",
+            "vbcaxq_s64",
+            "vbcaxq_u8",
+            "vbcaxq_u16",
+            "vbcaxq_u32",
+            "vbcaxq_u64",
+            "veor3q_s8",
+            "veor3q_s16",
+            "veor3q_s32",
+            "veor3q_s64",
+            "veor3q_u8",
+            "veor3q_u16",
+            "veor3q_u32",
+            "veor3q_u64",
+            "vadd_p8",
+            "vadd_p16",
+            "vadd_p64",
+            "vaddq_p8",
+            "vaddq_p16",
+            "vaddq_p64",
+            "vaddq_p128",
             "__dbg",
         ];
         let arm = match map.get(rust.name) {