bytecodealliance · abrown · Jul 15, 2020 · Jul 7, 2020 · Jul 7, 2020 · Jul 7, 2020
@@ -202,8 +202,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             // to be a big chunk of work to implement them all there!
             ("simd", _) if target.contains("aarch64") => return true,
 
-            ("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
-
             // TODO(#1886): Ignore reference types tests if this isn't x64,
             // because Cranelift only supports reference types on x64.
             ("reference_types", _) => {

@@ -211,6 +211,24 @@ impl TypeVar {
                     "can't double 256 lanes"
                 );
             }
+            DerivedFunc::MergeLanes => {
+                assert!(
+                    ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
+                    "can't double all integer types"
+                );
+                assert!(
+                    ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
+                    "can't double all float types"
+                );
+                assert!(
+                    ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
+                    "can't double all boolean types"
+                );
+                assert!(
+                    *ts.lanes.iter().min().unwrap() > 1,
+                    "can't halve a scalar type"
+                );
+            }
             DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
         }
 
@@ -248,6 +266,9 @@ impl TypeVar {
     pub fn split_lanes(&self) -> TypeVar {
         self.derived(DerivedFunc::SplitLanes)
     }
+    pub fn merge_lanes(&self) -> TypeVar {
+        self.derived(DerivedFunc::MergeLanes)
+    }
 
     /// Constrain the range of types this variable can assume to a subset of those in the typeset
     /// ts.
@@ -355,6 +376,7 @@ pub(crate) enum DerivedFunc {
     HalfVector,
     DoubleVector,
     SplitLanes,
+    MergeLanes,
 }
 
 impl DerivedFunc {
@@ -367,6 +389,7 @@ impl DerivedFunc {
             DerivedFunc::HalfVector => "half_vector",
             DerivedFunc::DoubleVector => "double_vector",
             DerivedFunc::SplitLanes => "split_lanes",
+            DerivedFunc::MergeLanes => "merge_lanes",
         }
     }
 
@@ -377,6 +400,8 @@ impl DerivedFunc {
             DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
             DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
             DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
+            DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
+            DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
             _ => None,
         }
     }
@@ -462,6 +487,7 @@ impl TypeSet {
             DerivedFunc::HalfVector => self.half_vector(),
             DerivedFunc::DoubleVector => self.double_vector(),
             DerivedFunc::SplitLanes => self.half_width().double_vector(),
+            DerivedFunc::MergeLanes => self.double_width().half_vector(),
         }
     }
 
@@ -601,7 +627,8 @@ impl TypeSet {
             DerivedFunc::DoubleWidth => self.half_width(),
             DerivedFunc::HalfVector => self.double_vector(),
             DerivedFunc::DoubleVector => self.half_vector(),
-            DerivedFunc::SplitLanes => self.half_vector().double_width(),
+            DerivedFunc::SplitLanes => self.double_width().half_vector(),
+            DerivedFunc::MergeLanes => self.half_width().double_vector(),
         }
     }
 

@@ -1669,6 +1669,7 @@ fn define_simd(
     let ssub_sat = shared.by_name("ssub_sat");
     let store = shared.by_name("store");
     let store_complex = shared.by_name("store_complex");
+    let swiden_low = shared.by_name("swiden_low");
     let uadd_sat = shared.by_name("uadd_sat");
     let uload8x8 = shared.by_name("uload8x8");
     let uload8x8_complex = shared.by_name("uload8x8_complex");
@@ -1678,6 +1679,7 @@ fn define_simd(
     let uload32x2_complex = shared.by_name("uload32x2_complex");
     let snarrow = shared.by_name("snarrow");
     let unarrow = shared.by_name("unarrow");
+    let uwiden_low = shared.by_name("uwiden_low");
     let ushr_imm = shared.by_name("ushr_imm");
     let usub_sat = shared.by_name("usub_sat");
     let vconst = shared.by_name("vconst");
@@ -1697,6 +1699,7 @@ fn define_simd(
     let x86_pminu = x86.by_name("x86_pminu");
     let x86_pmullq = x86.by_name("x86_pmullq");
     let x86_pmuludq = x86.by_name("x86_pmuludq");
+    let x86_palignr = x86.by_name("x86_palignr");
     let x86_pshufb = x86.by_name("x86_pshufb");
     let x86_pshufd = x86.by_name("x86_pshufd");
     let x86_psll = x86.by_name("x86_psll");
@@ -1901,6 +1904,8 @@ fn define_simd(
             rec_fa.opcodes(low),
         );
     }
+
+    // SIMD narrow/widen
     for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
         let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
         e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
@@ -1912,6 +1917,23 @@ fn define_simd(
         let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
         e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
     }
+    for (ty, swiden_opcode, uwiden_opcode) in &[
+        (I8, &PMOVSXBW[..], &PMOVZXBW[..]),
+        (I16, &PMOVSXWD[..], &PMOVZXWD[..]),
+    ] {
+        let isap = Some(use_sse41_simd);
+        let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
+        let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
+    }
+    for ty in &[I8, I16, I32, I64] {
+        e.enc_both_inferred_maybe_isap(
+            x86_palignr.bind(vector(*ty, sse_vector_size)),
+            rec_fa_ib.opcodes(&PALIGNR[..]),
+            Some(use_ssse3_simd),
+        );
+    }
 
     // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
     for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {

@@ -664,6 +664,21 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let c = &Operand::new("c", uimm8)
+        .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
+    ig.push(
+        Inst::new(
+            "x86_palignr",
+            r#"
+        Concatenate destination and source operands, extracting a byte-aligned result shifted to 
+        the right by `c`.
+        "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![x, y, c])
+        .operands_out(vec![a]),
+    );
+
     let i64_t = &TypeVar::new(
         "i64_t",
         "A scalar 64bit integer",

@@ -407,13 +407,18 @@ fn define_simd(
     let umax = insts.by_name("umax");
     let umin = insts.by_name("umin");
     let snarrow = insts.by_name("snarrow");
+    let swiden_high = insts.by_name("swiden_high");
+    let swiden_low = insts.by_name("swiden_low");
     let ushr_imm = insts.by_name("ushr_imm");
     let ushr = insts.by_name("ushr");
+    let uwiden_high = insts.by_name("uwiden_high");
+    let uwiden_low = insts.by_name("uwiden_low");
     let vconst = insts.by_name("vconst");
     let vall_true = insts.by_name("vall_true");
     let vany_true = insts.by_name("vany_true");
     let vselect = insts.by_name("vselect");
 
+    let x86_palignr = x86_instructions.by_name("x86_palignr");
     let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
     let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
     let x86_pmins = x86_instructions.by_name("x86_pmins");
@@ -786,6 +791,26 @@ fn define_simd(
         );
     }
 
+    // SIMD widen
+    for ty in &[I8, I16] {
+        let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = swiden_high(a)),
+            vec![
+                def!(c = x86_palignr(a, a, uimm8_eight)),
+                def!(b = swiden_low(c)),
+            ],
+        );
+        let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = uwiden_high(a)),
+            vec![
+                def!(c = x86_palignr(a, a, uimm8_eight)),
+                def!(b = uwiden_low(c)),
+            ],
+        );
+    }
+
     narrow.custom_legalize(shuffle, "convert_shuffle");
     narrow.custom_legalize(extractlane, "convert_extractlane");
     narrow.custom_legalize(insertlane, "convert_insertlane");

@@ -354,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
 /// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
 pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
 
+/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
+/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
+pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
+
 /// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
 pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
 
@@ -473,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
 pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
 
 /// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1.
+/// integers in xmm1 (SSE4.1).
 pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
 
 /// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
@@ -485,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
 pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
 
 /// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1.
+/// integers in xmm1 (SSE4.1).
 pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
 
 /// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of

@@ -3883,19 +3883,19 @@ pub(crate) fn define(
         .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
     );
 
-    let I16xN = &TypeVar::new(
-        "I16xN",
-        "A SIMD vector type containing integers 16-bits wide and up",
+    let I16or32xN = &TypeVar::new(
+        "I16or32xN",
+        "A SIMD vector type containing integer lanes 16 or 32 bits wide",
         TypeSetBuilder::new()
             .ints(16..32)
             .simd_lanes(4..8)
             .includes_scalars(false)
             .build(),
     );
 
-    let x = &Operand::new("x", I16xN);
-    let y = &Operand::new("y", I16xN);
-    let a = &Operand::new("a", &I16xN.split_lanes());
+    let x = &Operand::new("x", I16or32xN);
+    let y = &Operand::new("y", I16or32xN);
+    let a = &Operand::new("a", &I16or32xN.split_lanes());
 
     ig.push(
         Inst::new(
@@ -3934,6 +3934,75 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let I8or16xN = &TypeVar::new(
+        "I8or16xN",
+        "A SIMD vector type containing integer lanes 8 or 16 bits wide.",
+        TypeSetBuilder::new()
+            .ints(8..16)
+            .simd_lanes(8..16)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I8or16xN);
+    let a = &Operand::new("a", &I8or16xN.merge_lanes());
+
+    ig.push(
+        Inst::new(
+            "swiden_low",
+            r#"
+        Widen the low lanes of `x` using signed extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "swiden_high",
+            r#"
+        Widen the high lanes of `x` using signed extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uwiden_low",
+            r#"
+        Widen the low lanes of `x` using unsigned extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uwiden_high",
+            r#"
+        Widen the high lanes of `x` using unsigned extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
     let IntTo = &TypeVar::new(
         "IntTo",
         "A larger integer type with the same number of lanes",

@@ -584,6 +584,9 @@ enum OperandConstraint {
 
     /// This operand is `ctrlType.split_lanes()`.
     SplitLanes,
+
+    /// This operand is `ctrlType.merge_lanes()`.
+    MergeLanes,
 }
 
 impl OperandConstraint {
@@ -615,6 +618,11 @@ impl OperandConstraint {
                     .split_lanes()
                     .expect("invalid type for split_lanes"),
             ),
+            MergeLanes => Bound(
+                ctrl_type
+                    .merge_lanes()
+                    .expect("invalid type for merge_lanes"),
+            ),
         }
     }
 }

@@ -284,14 +284,25 @@ impl Type {
 
     /// Split the lane width in half and double the number of lanes to maintain the same bit-width.
     ///
-    /// If this is a scalar type of n bits, it produces a SIMD vector type of (n/2)x2.
+    /// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`.
     pub fn split_lanes(self) -> Option<Self> {
         match self.half_width() {
             Some(half_width) => half_width.by(2),
             None => None,
         }
     }
 
+    /// Merge lanes to half the number of lanes and double the lane width to maintain the same
+    /// bit-width.
+    ///
+    /// If this is a scalar type, it will return `None`.
+    pub fn merge_lanes(self) -> Option<Self> {
+        match self.double_width() {
+            Some(double_width) => double_width.half_vector(),
+            None => None,
+        }
+    }
+
     /// Index of this type, for use with hash tables etc.
     pub fn index(self) -> usize {
         usize::from(self.0)