Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements f64x2.convert_low_i32x4_u for x64 #2982

Merged
merged 3 commits into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
}

match (testsuite, testname) {
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
("simd", "simd_i16x8_extmul_i8x16") => return true,
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
Expand Down
4 changes: 2 additions & 2 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4441,10 +4441,10 @@ pub(crate) fn define(
Inst::new(
"fcvt_low_from_sint",
r#"
Converts packed signed doubleword integers to packed double precision floating point.
Converts packed signed 32-bit integers to packed double precision floating point.

Considering only the low half of the register, each lane in `x` is interpreted as a
signed doubleword integer that is then converted to a double precision float. This
signed 32-bit integer that is then converted to a double precision float. This
instruction differs from fcvt_from_sint in that it converts half the number of lanes
which are converted to occupy twice the number of bits. No rounding should be needed
for the resulting float.
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ pub enum SseOpcode {
Subsd,
Ucomiss,
Ucomisd,
Unpcklps,
Xorps,
Xorpd,
}
Expand Down Expand Up @@ -675,6 +676,7 @@ impl SseOpcode {
| SseOpcode::Subps
| SseOpcode::Subss
| SseOpcode::Ucomiss
| SseOpcode::Unpcklps
| SseOpcode::Xorps => SSE,

SseOpcode::Addpd
Expand Down Expand Up @@ -993,6 +995,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Subsd => "subsd",
SseOpcode::Ucomiss => "ucomiss",
SseOpcode::Ucomisd => "ucomisd",
SseOpcode::Unpcklps => "unpcklps",
SseOpcode::Xorps => "xorps",
SseOpcode::Xorpd => "xorpd",
};
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1529,6 +1529,7 @@ pub(crate) fn emit(
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
Expand Down
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3717,6 +3717,12 @@ fn test_x64_emit() {
"punpcklbw %xmm1, %xmm8",
));

insns.push((
Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2),
"410F14D3",
"unpcklps %xmm11, %xmm2",
));

// ========================================================
// XMM_RM_R: Integer Conversion
insns.push((
Expand Down
66 changes: 65 additions & 1 deletion cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4201,6 +4201,67 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
_ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
};
} else if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
let uwiden_input = InsnInput {
insn: uwiden,
input: 0,
};
let src = put_input_in_reg(ctx, uwiden_input);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let input_ty = ctx.input_ty(uwiden, 0);
let output_ty = ctx.output_ty(insn, 0);

// Matches_input further obfuscates which Wasm instruction this is ultimately
// lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
debug_assert!(input_ty == types::I32X4 || output_ty == types::F64X2);

// Algorithm uses unpcklps to help create a float that is equivalent
// 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
// every value of the mantissa represents a corresponding uint32 number.
// When we subtract 0x1.0p52 we are left with double(src).
let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, src, types::I32X4));

static UINT_MASK: [u8; 16] = [
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00,
];

let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));

ctx.emit(Inst::xmm_load_const(
uint_mask_const,
uint_mask,
types::I32X4,
));

// Creates 0x1.0p52 + double(src)
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Unpcklps,
RegMem::from(uint_mask),
dst,
));

static UINT_MASK_HIGH: [u8; 16] = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x30, 0x43,
];

let uint_mask_high_const =
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
uint_mask_high_const,
uint_mask_high,
types::I32X4,
));

// 0x1.0p52 + double(src) - 0x1.0p52
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Subpd,
RegMem::from(uint_mask_high),
dst,
));
} else {
assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
let src = put_input_in_reg(ctx, inputs[0]);
Expand Down Expand Up @@ -4543,7 +4604,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(types::I16X8, types::I32X4) => {
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst));
}
_ => unreachable!(),
_ => unreachable!(
"In UwidenLow: input_ty {:?}, output_ty {:?}",
input_ty, output_ty
),
},
Opcode::UwidenHigh => match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
Expand Down
6 changes: 3 additions & 3 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ use self::inst::EmitInfo;

use super::TargetIsa;
use crate::ir::{condcodes::IntCC, Function};
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self as shared_settings, Flags};
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};

use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use target_lexicon::Triple;

#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;

mod abi;
pub mod encoding;
mod inst;
Expand Down
8 changes: 6 additions & 2 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1778,6 +1778,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
}
Operator::F64x2ConvertLowI32x4U => {
let a = pop1_with_bitcast(state, I32X4, builder);
let widened_a = builder.ins().uwiden_low(a);
state.push1(builder.ins().fcvt_from_uint(F64X2, widened_a));
}
Operator::F64x2PromoteLowF32x4 => {
let a = pop1_with_bitcast(state, F32X4, builder);
state.push1(builder.ins().fvpromote_low(a));
Expand Down Expand Up @@ -1921,8 +1926,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8ExtAddPairwiseI8x16S
| Operator::I16x8ExtAddPairwiseI8x16U
| Operator::I32x4ExtAddPairwiseI16x8S
| Operator::I32x4ExtAddPairwiseI16x8U
| Operator::F64x2ConvertLowI32x4U => {
| Operator::I32x4ExtAddPairwiseI16x8U => {
return Err(wasm_unsupported!("proposed simd operator {:?}", op));
}
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
Expand Down