bytecodealliance · jlb6740 · Jul 9, 2021 · Jun 6, 2021 · Jun 14, 2021 · Jul 8, 2021
@@ -189,7 +189,6 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
     }
 
     match (testsuite, testname) {
-        ("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
         ("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
         ("simd", "simd_i16x8_extmul_i8x16") => return true,
         ("simd", "simd_i16x8_q15mulr_sat_s") => return true,

@@ -4441,10 +4441,10 @@ pub(crate) fn define(
         Inst::new(
             "fcvt_low_from_sint",
             r#"
-        Converts packed signed doubleword integers to packed double precision floating point.
+        Converts packed signed 32-bit integers to packed double precision floating point.
 
         Considering only the low half of the register, each lane in `x` is interpreted as a
-        signed doubleword integer that is then converted to a double precision float. This
+        signed 32-bit integer that is then converted to a double precision float. This
         instruction differs from fcvt_from_sint in that it converts half the number of lanes
         which are converted to occupy twice the number of bits. No rounding should be needed
         for the resulting float.

@@ -635,6 +635,7 @@ pub enum SseOpcode {
     Subsd,
     Ucomiss,
     Ucomisd,
+    Unpcklps,
     Xorps,
     Xorpd,
 }
@@ -675,6 +676,7 @@ impl SseOpcode {
             | SseOpcode::Subps
             | SseOpcode::Subss
             | SseOpcode::Ucomiss
+            | SseOpcode::Unpcklps
             | SseOpcode::Xorps => SSE,
 
             SseOpcode::Addpd
@@ -993,6 +995,7 @@ impl fmt::Debug for SseOpcode {
             SseOpcode::Subsd => "subsd",
             SseOpcode::Ucomiss => "ucomiss",
             SseOpcode::Ucomisd => "ucomisd",
+            SseOpcode::Unpcklps => "unpcklps",
             SseOpcode::Xorps => "xorps",
             SseOpcode::Xorpd => "xorpd",
         };

@@ -1529,6 +1529,7 @@ pub(crate) fn emit(
                 SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
                 SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
                 SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
+                SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
                 SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
                 SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
                 _ => unimplemented!("Opcode {:?} not implemented", op),

@@ -3717,6 +3717,12 @@ fn test_x64_emit() {
         "punpcklbw %xmm1, %xmm8",
     ));
 
+    insns.push((
+        Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2),
+        "410F14D3",
+        "unpcklps %xmm11, %xmm2",
+    ));
+
     // ========================================================
     // XMM_RM_R: Integer Conversion
     insns.push((

@@ -4201,6 +4201,67 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                     }
                     _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
                 };
+            } else if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
+                let uwiden_input = InsnInput {
+                    insn: uwiden,
+                    input: 0,
+                };
+                let src = put_input_in_reg(ctx, uwiden_input);
+                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                let input_ty = ctx.input_ty(uwiden, 0);
+                let output_ty = ctx.output_ty(insn, 0);
+
+                // Matches_input further obfuscates which Wasm instruction this is ultimately
+                // lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
+                debug_assert!(input_ty == types::I32X4 || output_ty == types::F64X2);
+
+                // Algorithm uses unpcklps to help create a float that is equivalent
+                // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
+                // every value of the mantissa represents a corresponding uint32 number.
+                // When we subtract 0x1.0p52 we are left with double(src).
+                let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
+                ctx.emit(Inst::gen_move(dst, src, types::I32X4));
+
+                static UINT_MASK: [u8; 16] = [
+                    0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
+                    0x00, 0x00, 0x00,
+                ];
+
+                let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
+
+                ctx.emit(Inst::xmm_load_const(
+                    uint_mask_const,
+                    uint_mask,
+                    types::I32X4,
+                ));
+
+                // Creates 0x1.0p52 + double(src)
+                ctx.emit(Inst::xmm_rm_r(
+                    SseOpcode::Unpcklps,
+                    RegMem::from(uint_mask),
+                    dst,
+                ));
+
+                static UINT_MASK_HIGH: [u8; 16] = [
+                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
+                    0x00, 0x30, 0x43,
+                ];
+
+                let uint_mask_high_const =
+                    ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
+                let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
+                ctx.emit(Inst::xmm_load_const(
+                    uint_mask_high_const,
+                    uint_mask_high,
+                    types::I32X4,
+                ));
+
+                // 0x1.0p52 + double(src) - 0x1.0p52
+                ctx.emit(Inst::xmm_rm_r(
+                    SseOpcode::Subpd,
+                    RegMem::from(uint_mask_high),
+                    dst,
+                ));
             } else {
                 assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
                 let src = put_input_in_reg(ctx, inputs[0]);
@@ -4543,7 +4604,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                         (types::I16X8, types::I32X4) => {
                             ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst));
                         }
-                        _ => unreachable!(),
+                        _ => unreachable!(
+                            "In UwidenLow: input_ty {:?}, output_ty {:?}",
+                            input_ty, output_ty
+                        ),
                     },
                     Opcode::UwidenHigh => match (input_ty, output_ty) {
                         (types::I8X16, types::I16X8) => {

@@ -4,19 +4,19 @@ use self::inst::EmitInfo;
 
 use super::TargetIsa;
 use crate::ir::{condcodes::IntCC, Function};
+#[cfg(feature = "unwind")]
+use crate::isa::unwind::systemv;
 use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
 use crate::isa::Builder as IsaBuilder;
 use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
 use crate::result::CodegenResult;
 use crate::settings::{self as shared_settings, Flags};
 use alloc::{boxed::Box, vec::Vec};
 use core::hash::{Hash, Hasher};
+
 use regalloc::{PrettyPrint, RealRegUniverse, Reg};
 use target_lexicon::Triple;
 
-#[cfg(feature = "unwind")]
-use crate::isa::unwind::systemv;
-
 mod abi;
 pub mod encoding;
 mod inst;

@@ -1778,6 +1778,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let a = pop1_with_bitcast(state, I32X4, builder);
             state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
         }
+        Operator::F64x2ConvertLowI32x4U => {
+            let a = pop1_with_bitcast(state, I32X4, builder);
+            let widened_a = builder.ins().uwiden_low(a);
+            state.push1(builder.ins().fcvt_from_uint(F64X2, widened_a));
+        }
         Operator::F64x2PromoteLowF32x4 => {
             let a = pop1_with_bitcast(state, F32X4, builder);
             state.push1(builder.ins().fvpromote_low(a));
@@ -1921,8 +1926,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I16x8ExtAddPairwiseI8x16S
         | Operator::I16x8ExtAddPairwiseI8x16U
         | Operator::I32x4ExtAddPairwiseI16x8S
-        | Operator::I32x4ExtAddPairwiseI16x8U
-        | Operator::F64x2ConvertLowI32x4U => {
+        | Operator::I32x4ExtAddPairwiseI16x8U => {
             return Err(wasm_unsupported!("proposed simd operator {:?}", op));
         }
         Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {