From 2873d809d18fff21217692bacc70bc3b58b5b6a2 Mon Sep 17 00:00:00 2001
From: SparrowLii <liyuan179@huawei.com>
Date: Fri, 15 Oct 2021 11:02:16 +0800
Subject: [PATCH] correct assert_instr

---
 .../core_arch/src/aarch64/neon/generated.rs   | 110 ++++----
 .../src/arm_shared/neon/generated.rs          | 264 +++++++++---------
 crates/stdarch-gen/neon.spec                  |  93 +++---
 3 files changed, 237 insertions(+), 230 deletions(-)

diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index c1f277a909..41b386401b 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -245,7 +245,7 @@ pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 /// Compare bitwise equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 {
     transmute(vceq_s64(transmute(a), transmute(b)))
 }
@@ -253,7 +253,7 @@ pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 {
 /// Compare bitwise equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 {
     transmute(vceq_u64(transmute(a), transmute(b)))
 }
@@ -261,7 +261,7 @@ pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 {
 /// Floating-point compare equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 {
     simd_extract(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
@@ -269,7 +269,7 @@ pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 {
 /// Floating-point compare equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vceqd_f64(a: f64, b: f64) -> u64 {
     simd_extract(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
@@ -493,7 +493,7 @@ pub unsafe fn vceqzq_f64(a: float64x2_t) -> uint64x2_t {
 /// Compare bitwise equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vceqzd_s64(a: i64) -> u64 {
     transmute(vceqz_s64(transmute(a)))
 }
@@ -501,7 +501,7 @@ pub unsafe fn vceqzd_s64(a: i64) -> u64 {
 /// Compare bitwise equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vceqzd_u64(a: u64) -> u64 {
     transmute(vceqz_u64(transmute(a)))
 }
@@ -509,7 +509,7 @@ pub unsafe fn vceqzd_u64(a: u64) -> u64 {
 /// Floating-point compare bitwise equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vceqzs_f32(a: f32) -> u32 {
     simd_extract(vceqz_f32(vdup_n_f32(a)), 0)
 }
@@ -517,7 +517,7 @@ pub unsafe fn vceqzs_f32(a: f32) -> u32 {
 /// Floating-point compare bitwise equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vceqzd_f64(a: f64) -> u64 {
     simd_extract(vceqz_f64(vdup_n_f64(a)), 0)
 }
@@ -585,7 +585,7 @@ pub unsafe fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 /// Compare bitwise test bits nonzero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(tst))]
 pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
     transmute(vtst_s64(transmute(a), transmute(b)))
 }
@@ -593,7 +593,7 @@ pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
 /// Compare bitwise test bits nonzero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(tst))]
 pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
     transmute(vtst_u64(transmute(a), transmute(b)))
 }
@@ -707,7 +707,7 @@ pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 /// Compare greater than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 {
     transmute(vcgt_s64(transmute(a), transmute(b)))
 }
@@ -715,7 +715,7 @@ pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 {
 /// Compare greater than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhi))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 {
     transmute(vcgt_u64(transmute(a), transmute(b)))
 }
@@ -723,7 +723,7 @@ pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 {
 /// Floating-point compare greater than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 {
     simd_extract(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
@@ -731,7 +731,7 @@ pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 {
 /// Floating-point compare greater than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcgtd_f64(a: f64, b: f64) -> u64 {
     simd_extract(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
@@ -787,7 +787,7 @@ pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 /// Compare less than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 {
     transmute(vclt_s64(transmute(a), transmute(b)))
 }
@@ -795,7 +795,7 @@ pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 {
 /// Compare less than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhi))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 {
     transmute(vclt_u64(transmute(a), transmute(b)))
 }
@@ -803,7 +803,7 @@ pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 {
 /// Floating-point compare less than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 {
     simd_extract(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
@@ -811,7 +811,7 @@ pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 {
 /// Floating-point compare less than
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcltd_f64(a: f64, b: f64) -> u64 {
     simd_extract(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
@@ -835,7 +835,7 @@ pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
 /// Compare greater than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 {
     transmute(vcge_s64(transmute(a), transmute(b)))
 }
@@ -843,7 +843,7 @@ pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 {
 /// Compare greater than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 {
     transmute(vcge_u64(transmute(a), transmute(b)))
 }
@@ -851,7 +851,7 @@ pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 {
 /// Floating-point compare greater than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 {
     simd_extract(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
@@ -859,7 +859,7 @@ pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 {
 /// Floating-point compare greater than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcged_f64(a: f64, b: f64) -> u64 {
     simd_extract(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
@@ -899,7 +899,7 @@ pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
 /// Compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 {
     transmute(vcle_s64(transmute(a), transmute(b)))
 }
@@ -907,7 +907,7 @@ pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 {
 /// Compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 {
     transmute(vcle_u64(transmute(a), transmute(b)))
 }
@@ -915,7 +915,7 @@ pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 {
 /// Floating-point compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 {
     simd_extract(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
@@ -923,7 +923,7 @@ pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 {
 /// Floating-point compare less than or equal
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcled_f64(a: f64, b: f64) -> u64 {
     simd_extract(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
@@ -1087,7 +1087,7 @@ pub unsafe fn vcgezq_f64(a: float64x2_t) -> uint64x2_t {
 /// Compare signed greater than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(eor))]
 pub unsafe fn vcgezd_s64(a: i64) -> u64 {
     transmute(vcgez_s64(transmute(a)))
 }
@@ -1095,7 +1095,7 @@ pub unsafe fn vcgezd_s64(a: i64) -> u64 {
 /// Floating-point compare greater than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcgezs_f32(a: f32) -> u32 {
     simd_extract(vcgez_f32(vdup_n_f32(a)), 0)
 }
@@ -1103,7 +1103,7 @@ pub unsafe fn vcgezs_f32(a: f32) -> u32 {
 /// Floating-point compare greater than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcgezd_f64(a: f64) -> u64 {
     simd_extract(vcgez_f64(vdup_n_f64(a)), 0)
 }
@@ -1219,7 +1219,7 @@ pub unsafe fn vcgtzq_f64(a: float64x2_t) -> uint64x2_t {
 /// Compare signed greater than zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vcgtzd_s64(a: i64) -> u64 {
     transmute(vcgtz_s64(transmute(a)))
 }
@@ -1227,7 +1227,7 @@ pub unsafe fn vcgtzd_s64(a: i64) -> u64 {
 /// Floating-point compare greater than zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcgtzs_f32(a: f32) -> u32 {
     simd_extract(vcgtz_f32(vdup_n_f32(a)), 0)
 }
@@ -1235,7 +1235,7 @@ pub unsafe fn vcgtzs_f32(a: f32) -> u32 {
 /// Floating-point compare greater than zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vcgtzd_f64(a: f64) -> u64 {
     simd_extract(vcgtz_f64(vdup_n_f64(a)), 0)
 }
@@ -1351,7 +1351,7 @@ pub unsafe fn vclezq_f64(a: float64x2_t) -> uint64x2_t {
 /// Compare less than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 pub unsafe fn vclezd_s64(a: i64) -> u64 {
     transmute(vclez_s64(transmute(a)))
 }
@@ -1359,7 +1359,7 @@ pub unsafe fn vclezd_s64(a: i64) -> u64 {
 /// Floating-point compare less than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmle))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vclezs_f32(a: f32) -> u32 {
     simd_extract(vclez_f32(vdup_n_f32(a)), 0)
 }
@@ -1367,7 +1367,7 @@ pub unsafe fn vclezs_f32(a: f32) -> u32 {
 /// Floating-point compare less than or equal to zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmle))]
+#[cfg_attr(test, assert_instr(fcmp))]
 pub unsafe fn vclezd_f64(a: f64) -> u64 {
     simd_extract(vclez_f64(vdup_n_f64(a)), 0)
 }
@@ -1499,7 +1499,7 @@ pub unsafe fn vcltzs_f32(a: f32) -> u32 {
 /// Floating-point compare less than zero
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmlt))]
+#[cfg_attr(test, assert_instr(asr))]
 pub unsafe fn vcltzd_f64(a: f64) -> u64 {
     simd_extract(vcltz_f64(vdup_n_f64(a)), 0)
 }
@@ -7502,7 +7502,7 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
 /// Floating-point add across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddv))]
+#[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -7515,7 +7515,7 @@ pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
 /// Floating-point add across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddv))]
+#[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -7528,7 +7528,7 @@ pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
 /// Floating-point add across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddv))]
+#[cfg_attr(test, assert_instr(faddp))]
 pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8437,7 +8437,7 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 /// Floating-point maximum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmv))]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8450,7 +8450,7 @@ pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
 /// Floating-point maximum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmv))]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8463,7 +8463,7 @@ pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
 /// Floating-point maximum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmv))]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8619,7 +8619,7 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 /// Floating-point minimum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmv))]
+#[cfg_attr(test, assert_instr(fminnmp))]
 pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8632,7 +8632,7 @@ pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
 /// Floating-point minimum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmv))]
+#[cfg_attr(test, assert_instr(fminnmp))]
 pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8645,7 +8645,7 @@ pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
 /// Floating-point minimum number across vector
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmv))]
+#[cfg_attr(test, assert_instr(fminnmp))]
 pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
     #[allow(improper_ctypes)]
     extern "unadjusted" {
@@ -8658,7 +8658,7 @@ pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
 /// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2))]
+#[cfg_attr(test, assert_instr(sxtl2))]
 pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
     let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
     vmovl_s8(a)
@@ -8667,7 +8667,7 @@ pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
 /// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2))]
+#[cfg_attr(test, assert_instr(sxtl2))]
 pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
     let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
     vmovl_s16(a)
@@ -8676,7 +8676,7 @@ pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
 /// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2))]
+#[cfg_attr(test, assert_instr(sxtl2))]
 pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
     let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
     vmovl_s32(a)
@@ -8685,7 +8685,7 @@ pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
 /// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2))]
+#[cfg_attr(test, assert_instr(uxtl2))]
 pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
     let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
     vmovl_u8(a)
@@ -8694,7 +8694,7 @@ pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
 /// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2))]
+#[cfg_attr(test, assert_instr(uxtl2))]
 pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
     let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
     vmovl_u16(a)
@@ -8703,7 +8703,7 @@ pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
 /// Vector move
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2))]
+#[cfg_attr(test, assert_instr(uxtl2))]
 pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
     let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
     vmovl_u32(a)
@@ -9136,7 +9136,7 @@ pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
 /// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
     static_assert_imm2!(LANE);
@@ -9146,7 +9146,7 @@ pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -
 /// Signed saturating doubling multiply-add long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
     static_assert_imm3!(LANE);
@@ -9286,7 +9286,7 @@ pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
 /// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
     static_assert_imm2!(LANE);
@@ -9296,7 +9296,7 @@ pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -
 /// Signed saturating doubling multiply-subtract long
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
     static_assert_imm3!(LANE);
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index 3f52544c37..b5457bcfb1 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -22680,9 +22680,9 @@ pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
-    let a0: int8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
-    let b0: int8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
+pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: uint8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
     transmute((a0, b0))
 }
 
@@ -22692,9 +22692,9 @@ pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
-    let a0: int16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: int16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: uint16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
     transmute((a0, b0))
 }
 
@@ -22704,9 +22704,9 @@ pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
-    let a0: int32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
-    let b0: int32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: poly8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
     transmute((a0, b0))
 }
 
@@ -22716,9 +22716,9 @@ pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
-    let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: uint8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: poly16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
     transmute((a0, b0))
 }
 
@@ -22726,11 +22726,11 @@ pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
-    let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
-    let b0: uint16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a0: int32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: int32x2_t = simd_shuffle2!(a, b, [1, 3]);
     transmute((a0, b0))
 }
 
@@ -22738,11 +22738,11 @@ pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
-    let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
-    let b0: uint8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
+pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a0: uint32x2_t = simd_shuffle2!(a, b, [0, 2]);
+    let b0: uint32x2_t = simd_shuffle2!(a, b, [1, 3]);
     transmute((a0, b0))
 }
 
@@ -22750,11 +22750,11 @@ pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
-    let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: uint16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a0: int8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
+    let b0: int8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
     transmute((a0, b0))
 }
 
@@ -22762,11 +22762,11 @@ pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
-    let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
-    let b0: uint32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a0: int16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: int16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
     transmute((a0, b0))
 }
 
@@ -22774,11 +22774,11 @@ pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
-    let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: poly8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a0: int32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: int32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
     transmute((a0, b0))
 }
 
@@ -22786,11 +22786,11 @@ pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
-    let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
-    let b0: poly16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
+    let b0: uint8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
     transmute((a0, b0))
 }
 
@@ -22798,11 +22798,11 @@ pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
-    let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
-    let b0: poly8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: uint16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
     transmute((a0, b0))
 }
 
@@ -22810,11 +22810,11 @@ pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
-    let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: poly16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
+    let b0: uint32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
     transmute((a0, b0))
 }
 
@@ -22822,11 +22822,11 @@ pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
-    let a0: int32x2_t = simd_shuffle2!(a, b, [0, 2]);
-    let b0: int32x2_t = simd_shuffle2!(a, b, [1, 3]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
+    let b0: poly8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
     transmute((a0, b0))
 }
 
@@ -22834,11 +22834,11 @@ pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
-pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
-    let a0: uint32x2_t = simd_shuffle2!(a, b, [0, 2]);
-    let b0: uint32x2_t = simd_shuffle2!(a, b, [1, 3]);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
+pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: poly16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
     transmute((a0, b0))
 }
 
@@ -22846,7 +22846,7 @@ pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
 pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     let a0: float32x2_t = simd_shuffle2!(a, b, [0, 2]);
@@ -22858,8 +22858,8 @@ pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
 pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     let a0: float32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]);
     let b0: float32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]);
@@ -22870,7 +22870,7 @@ pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     let a0: int8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -22882,7 +22882,7 @@ pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     let a0: int16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
@@ -22894,7 +22894,7 @@ pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     let a0: int8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
@@ -22906,7 +22906,7 @@ pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     let a0: int16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -22918,7 +22918,7 @@ pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     let a0: int32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
@@ -22930,7 +22930,7 @@ pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -22942,7 +22942,7 @@ pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
@@ -22954,7 +22954,7 @@ pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
@@ -22966,7 +22966,7 @@ pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -22978,7 +22978,7 @@ pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
@@ -22990,7 +22990,7 @@ pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -23002,7 +23002,7 @@ pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
     let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
@@ -23014,7 +23014,7 @@ pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
     let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
@@ -23026,7 +23026,7 @@ pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
     let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -23074,7 +23074,7 @@ pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))]
 pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     let a0: float32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]);
@@ -37719,6 +37719,60 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [u8; 16] = transmute(vzip_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_u16() {
+        let a: u16x4 = u16x4::new(0, 2, 4, 6);
+        let b: u16x4 = u16x4::new(1, 3, 5, 7);
+        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [u16; 8] = transmute(vzip_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        let r: [u8; 16] = transmute(vzip_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_p16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
+        let r: [u16; 8] = transmute(vzip_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_s32() {
+        let a: i32x2 = i32x2::new(0, 2);
+        let b: i32x2 = i32x2::new(1, 3);
+        let e: [i32; 4] = [0, 1, 2, 3];
+        let r: [i32; 4] = transmute(vzip_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip_u32() {
+        let a: u32x2 = u32x2::new(0, 2);
+        let b: u32x2 = u32x2::new(1, 3);
+        let e: [u32; 4] = [0, 1, 2, 3];
+        let r: [u32; 4] = transmute(vzip_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vzipq_s8() {
         let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
@@ -37746,24 +37800,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [u8; 16] = transmute(vzip_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_u16() {
-        let a: u16x4 = u16x4::new(0, 2, 4, 6);
-        let b: u16x4 = u16x4::new(1, 3, 5, 7);
-        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [u16; 8] = transmute(vzip_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vzipq_u8() {
         let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
@@ -37791,24 +37827,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [u8; 16] = transmute(vzip_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_p16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [u16; 8] = transmute(vzip_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vzipq_p8() {
         let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
@@ -37827,24 +37845,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_s32() {
-        let a: i32x2 = i32x2::new(0, 2);
-        let b: i32x2 = i32x2::new(1, 3);
-        let e: [i32; 4] = [0, 1, 2, 3];
-        let r: [i32; 4] = transmute(vzip_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_u32() {
-        let a: u32x2 = u32x2::new(0, 2);
-        let b: u32x2 = u32x2::new(1, 3);
-        let e: [u32; 4] = [0, 1, 2, 3];
-        let r: [u32; 4] = transmute(vzip_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vzip_f32() {
         let a: f32x2 = f32x2::new(1., 2.);
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
index 79cd0e4292..087d093557 100644
--- a/crates/stdarch-gen/neon.spec
+++ b/crates/stdarch-gen/neon.spec
@@ -335,7 +335,7 @@ a = 1
 b = 2
 validate 0
 
-aarch64 = cmeq
+aarch64 = cmp
 generate i64:u64, u64
 
 /// Floating-point compare equal
@@ -345,7 +345,7 @@ a = 1.
 b = 2.
 validate 0
 
-aarch64 = fcmeq
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 /// Signed compare bitwise equal to zero
@@ -384,7 +384,7 @@ multi_fn = transmute, {vceqz-in_ntt-noext, {transmute, a}}
 a = 1
 validate 0
 
-aarch64 = cmeq
+aarch64 = cmp
 generate i64:u64, u64
 
 /// Floating-point compare bitwise equal to zero
@@ -393,7 +393,7 @@ multi_fn = simd_extract, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
 a = 1.
 validate 0
 
-aarch64 = fcmeq
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 /// Signed compare bitwise Test bits nonzero
@@ -435,7 +435,7 @@ a = 0
 b = 0
 validate 0
 
-aarch64 = cmtst
+aarch64 = tst
 generate i64:i64:u64, u64
 
 /// Signed saturating accumulate of unsigned value
@@ -524,10 +524,8 @@ a = 1
 b = 2
 validate 0
 
-aarch64 = cmgt
-generate i64:u64
-aarch64 = cmhi
-generate u64
+aarch64 = cmp
+generate i64:u64, u64
 
 /// Floating-point compare greater than
 name = vcgt
@@ -536,7 +534,7 @@ a = 1.
 b = 2.
 validate 0
 
-aarch64 = fcmgt
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 ////////////////////
@@ -588,10 +586,8 @@ a = 2
 b = 1
 validate 0
 
-aarch64 = cmgt
-generate i64:u64
-aarch64 = cmhi
-generate u64
+aarch64 = cmp
+generate i64:u64, u64
 
 /// Floating-point compare less than
 name = vclt
@@ -600,7 +596,7 @@ a = 2.
 b = 1.
 validate 0
 
-aarch64 = fcmgt
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 ////////////////////
@@ -627,7 +623,7 @@ a = 1
 b = 2
 validate 0
 
-aarch64 = cmge
+aarch64 = cmp
 generate i64:u64, u64
 
 /// Floating-point compare greater than or equal
@@ -637,7 +633,7 @@ a = 1.
 b = 2.
 validate 0
 
-aarch64 = fcmge
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 /// Compare unsigned less than or equal
@@ -672,10 +668,8 @@ a = 2
 b = 1
 validate 0
 
-aarch64 = cmge
-generate i64:u64
-aaech64 = cmhs
-generate u64
+aarch64 = cmp
+generate i64:u64, u64
 
 /// Floating-point compare less than or equal
 name = vcle
@@ -684,7 +678,7 @@ a = 2.
 b = 1.
 validate 0
 
-aarch64 = fcmge
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 ////////////////////
@@ -756,7 +750,7 @@ multi_fn = transmute, {vcgez-in_ntt-noext, {transmute, a}}
 a = -1
 validate 0
 
-aarch64 = cmge
+aarch64 = eor
 generate i64:u64
 
 /// Floating-point compare greater than or equal to zero
@@ -765,7 +759,7 @@ multi_fn = simd_extract, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
 a = -1.
 validate 0
 
-aarch64 = fcmge
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 /// Compare signed greater than zero
@@ -794,7 +788,7 @@ multi_fn = transmute, {vcgtz-in_ntt-noext, {transmute, a}}
 a = -1
 validate 0
 
-aarch64 = cmgt
+aarch64 = cmp
 generate i64:u64
 
 /// Floating-point compare greater than zero
@@ -803,7 +797,7 @@ multi_fn = simd_extract, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
 a = -1.
 validate 0
 
-aarch64 = fcmgt
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 /// Compare signed less than or equal to zero
@@ -832,7 +826,7 @@ multi_fn = transmute, {vclez-in_ntt-noext, {transmute, a}}
 a = 2
 validate 0
 
-aarch64 = cmgt
+aarch64 = cmp
 generate i64:u64
 
 /// Floating-point compare less than or equal to zero
@@ -841,7 +835,7 @@ multi_fn = simd_extract, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
 a = 2.
 validate 0
 
-aarch64 = fcmle
+aarch64 = fcmp
 generate f32:u32, f64:u64
 
 /// Compare signed less than zero
@@ -880,7 +874,9 @@ a = 2.
 validate 0
 
 aarch64 = fcmlt
-generate f32:u32, f64:u64
+generate f32:u32
+aarch64 = asr
+generate f64:u64
 
 /// Count leading sign bits
 name = vcls
@@ -4096,7 +4092,7 @@ name = vaddv
 a = 1., 2., 0., 0.
 validate 3.
 
-aarch64 = faddv
+aarch64 = faddp
 link-aarch64 = faddv._EXT2_._EXT_
 generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
 
@@ -4701,7 +4697,7 @@ name = vmaxnmv
 a = 1., 2., 0., 1.
 validate 2.
 
-aarch64 = fmaxnmv
+aarch64 = fmaxnmp
 link-aarch64 = fmaxnmv._EXT2_._EXT_
 generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
 
@@ -4803,7 +4799,7 @@ name = vminnmv
 a = 1., 0., 2., 3.
 validate 0.
 
-aarch64 = fminnmv
+aarch64 = fminnmp
 link-aarch64 = fminnmv._EXT2_._EXT_
 generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
 
@@ -4833,10 +4829,10 @@ multi_fn = vmovl-noqself-noext, a
 a = 1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10
 validate 3, 4, 5, 6, 7, 8, 9, 10
 
-aarch64 = sshll2
+aarch64 = sxtl2
 generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
 
-aarch64 = ushll2
+aarch64 = uxtl2
 generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
 
 /// Floating-point add pairwise
@@ -5161,9 +5157,10 @@ c = 2, 1, 1, 1, 1, 1, 1, 1
 n = 0
 validate 5
 
-aarch64 = sqdmull
+aarch64 = sqdmlal
 generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
 name = vqdmlals_lane
+aarch64 = sqdmull
 generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
 
 /// Signed saturating doubling multiply-subtract long
@@ -5272,9 +5269,10 @@ c = 2, 1, 1, 1, 1, 1, 1, 1
 n = 0
 validate 6
 
-aarch64 = sqdmull
+aarch64 = sqdmlsl
 generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
 name = vqdmlsls_lane
+aarch64 = sqdmull
 generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
 
 /// Signed saturating doubling multiply returning high half
@@ -6951,10 +6949,16 @@ validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2
 
 aarch64 = zip
 arm = vzip
-generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
-generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
-generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
+generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t
+generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t
+generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t
+arm = vtrn
 generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
+aarch64 = ext
+arm = vorr
+generate int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
+generate uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
+generate poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
 
 /// Zip vectors
 name = vzip
@@ -6966,8 +6970,11 @@ b = 5., 6., 7., 8.
 validate 1., 5., 2., 6., 3., 7., 4., 8.
 
 aarch64 = zip
-arm = vzip
-generate float32x2_t:float32x2_t:float32x2x2_t, float32x4_t:float32x4_t:float32x4x2_t
+arm = vtrn
+generate float32x2_t:float32x2_t:float32x2x2_t
+aarch64 = ext
+arm = vorr
+generate float32x4_t:float32x4_t:float32x4x2_t
 
 /// Zip vectors
 name = vzip1
@@ -7019,7 +7026,7 @@ b = 2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32
 validate 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32
 
 aarch64 = uzp
-arm = vzup
+arm = vuzp
 generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
 generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
 generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
@@ -7040,7 +7047,7 @@ aarch64 = zip
 arm = vtrn
 generate float32x2_t:float32x2_t:float32x2x2_t
 aarch64 = uzp
-arm = vzup
+arm = vuzp
 generate float32x4_t:float32x4_t:float32x4x2_t
 
 /// Unzip vectors