Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement SIMD widening instructions for x86 #1994

Merged
merged 6 commits into from
Jul 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
// to be a big chunk of work to implement them all there!
("simd", _) if target.contains("aarch64") => return true,

("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S

// TODO(#1886): Ignore reference types tests if this isn't x64,
// because Cranelift only supports reference types on x64.
("reference_types", _) => {
Expand Down
29 changes: 28 additions & 1 deletion cranelift/codegen/meta/src/cdsl/typevar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,24 @@ impl TypeVar {
"can't double 256 lanes"
);
}
DerivedFunc::MergeLanes => {
assert!(
ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
"can't double all integer types"
);
assert!(
ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
"can't double all float types"
);
assert!(
ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
"can't double all boolean types"
);
assert!(
*ts.lanes.iter().min().unwrap() > 1,
"can't halve a scalar type"
);
}
DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
}

Expand Down Expand Up @@ -248,6 +266,9 @@ impl TypeVar {
pub fn split_lanes(&self) -> TypeVar {
self.derived(DerivedFunc::SplitLanes)
}
pub fn merge_lanes(&self) -> TypeVar {
self.derived(DerivedFunc::MergeLanes)
}

/// Constrain the range of types this variable can assume to a subset of those in the typeset
/// ts.
Expand Down Expand Up @@ -355,6 +376,7 @@ pub(crate) enum DerivedFunc {
HalfVector,
DoubleVector,
SplitLanes,
MergeLanes,
}

impl DerivedFunc {
Expand All @@ -367,6 +389,7 @@ impl DerivedFunc {
DerivedFunc::HalfVector => "half_vector",
DerivedFunc::DoubleVector => "double_vector",
DerivedFunc::SplitLanes => "split_lanes",
DerivedFunc::MergeLanes => "merge_lanes",
}
}

Expand All @@ -377,6 +400,8 @@ impl DerivedFunc {
DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
_ => None,
}
}
Expand Down Expand Up @@ -462,6 +487,7 @@ impl TypeSet {
DerivedFunc::HalfVector => self.half_vector(),
DerivedFunc::DoubleVector => self.double_vector(),
DerivedFunc::SplitLanes => self.half_width().double_vector(),
DerivedFunc::MergeLanes => self.double_width().half_vector(),
}
}

Expand Down Expand Up @@ -601,7 +627,8 @@ impl TypeSet {
DerivedFunc::DoubleWidth => self.half_width(),
DerivedFunc::HalfVector => self.double_vector(),
DerivedFunc::DoubleVector => self.half_vector(),
DerivedFunc::SplitLanes => self.half_vector().double_width(),
DerivedFunc::SplitLanes => self.double_width().half_vector(),
DerivedFunc::MergeLanes => self.half_width().double_vector(),
}
}

Expand Down
22 changes: 22 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1669,6 +1669,7 @@ fn define_simd(
let ssub_sat = shared.by_name("ssub_sat");
let store = shared.by_name("store");
let store_complex = shared.by_name("store_complex");
let swiden_low = shared.by_name("swiden_low");
let uadd_sat = shared.by_name("uadd_sat");
let uload8x8 = shared.by_name("uload8x8");
let uload8x8_complex = shared.by_name("uload8x8_complex");
Expand All @@ -1678,6 +1679,7 @@ fn define_simd(
let uload32x2_complex = shared.by_name("uload32x2_complex");
let snarrow = shared.by_name("snarrow");
let unarrow = shared.by_name("unarrow");
let uwiden_low = shared.by_name("uwiden_low");
let ushr_imm = shared.by_name("ushr_imm");
let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst");
Expand All @@ -1697,6 +1699,7 @@ fn define_simd(
let x86_pminu = x86.by_name("x86_pminu");
let x86_pmullq = x86.by_name("x86_pmullq");
let x86_pmuludq = x86.by_name("x86_pmuludq");
let x86_palignr = x86.by_name("x86_palignr");
let x86_pshufb = x86.by_name("x86_pshufb");
let x86_pshufd = x86.by_name("x86_pshufd");
let x86_psll = x86.by_name("x86_psll");
Expand Down Expand Up @@ -1901,6 +1904,8 @@ fn define_simd(
rec_fa.opcodes(low),
);
}

// SIMD narrow/widen
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
Expand All @@ -1912,6 +1917,23 @@ fn define_simd(
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
}
for (ty, swiden_opcode, uwiden_opcode) in &[
(I8, &PMOVSXBW[..], &PMOVZXBW[..]),
(I16, &PMOVSXWD[..], &PMOVZXWD[..]),
] {
let isap = Some(use_sse41_simd);
let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
}
for ty in &[I8, I16, I32, I64] {
e.enc_both_inferred_maybe_isap(
x86_palignr.bind(vector(*ty, sse_vector_size)),
rec_fa_ib.opcodes(&PALIGNR[..]),
Some(use_ssse3_simd),
);
}

// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
Expand Down
15 changes: 15 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,21 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let c = &Operand::new("c", uimm8)
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
ig.push(
Inst::new(
"x86_palignr",
r#"
Concatenate destination and source operands, extracting a byte-aligned result shifted to
the right by `c`.
"#,
&formats.ternary_imm8,
)
.operands_in(vec![x, y, c])
.operands_out(vec![a]),
);

let i64_t = &TypeVar::new(
"i64_t",
"A scalar 64bit integer",
Expand Down
25 changes: 25 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/legalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,13 +407,18 @@ fn define_simd(
let umax = insts.by_name("umax");
let umin = insts.by_name("umin");
let snarrow = insts.by_name("snarrow");
let swiden_high = insts.by_name("swiden_high");
let swiden_low = insts.by_name("swiden_low");
let ushr_imm = insts.by_name("ushr_imm");
let ushr = insts.by_name("ushr");
let uwiden_high = insts.by_name("uwiden_high");
let uwiden_low = insts.by_name("uwiden_low");
let vconst = insts.by_name("vconst");
let vall_true = insts.by_name("vall_true");
let vany_true = insts.by_name("vany_true");
let vselect = insts.by_name("vselect");

let x86_palignr = x86_instructions.by_name("x86_palignr");
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
let x86_pmins = x86_instructions.by_name("x86_pmins");
Expand Down Expand Up @@ -786,6 +791,26 @@ fn define_simd(
);
}

// SIMD widen
for ty in &[I8, I16] {
let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(b = swiden_high(a)),
vec![
def!(c = x86_palignr(a, a, uimm8_eight)),
def!(b = swiden_low(c)),
],
);
let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(b = uwiden_high(a)),
vec![
def!(c = x86_palignr(a, a, uimm8_eight)),
def!(b = uwiden_low(c)),
],
);
}

narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane");
Expand Down
8 changes: 6 additions & 2 deletions cranelift/codegen/meta/src/isa/x86/opcodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];

/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];

/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];

Expand Down Expand Up @@ -473,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];

/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
/// integers in xmm1.
/// integers in xmm1 (SSE4.1).
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];

/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
Expand All @@ -485,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];

/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
/// integers in xmm1.
/// integers in xmm1 (SSE4.1).
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];

/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
Expand Down
81 changes: 75 additions & 6 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3883,19 +3883,19 @@ pub(crate) fn define(
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
);

let I16xN = &TypeVar::new(
"I16xN",
"A SIMD vector type containing integers 16-bits wide and up",
let I16or32xN = &TypeVar::new(
"I16or32xN",
"A SIMD vector type containing integer lanes 16 or 32 bits wide",
TypeSetBuilder::new()
.ints(16..32)
.simd_lanes(4..8)
.includes_scalars(false)
.build(),
);

let x = &Operand::new("x", I16xN);
let y = &Operand::new("y", I16xN);
let a = &Operand::new("a", &I16xN.split_lanes());
let x = &Operand::new("x", I16or32xN);
let y = &Operand::new("y", I16or32xN);
let a = &Operand::new("a", &I16or32xN.split_lanes());

ig.push(
Inst::new(
Expand Down Expand Up @@ -3934,6 +3934,75 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let I8or16xN = &TypeVar::new(
"I8or16xN",
"A SIMD vector type containing integer lanes 8 or 16 bits wide.",
TypeSetBuilder::new()
.ints(8..16)
.simd_lanes(8..16)
.includes_scalars(false)
.build(),
);

let x = &Operand::new("x", I8or16xN);
let a = &Operand::new("a", &I8or16xN.merge_lanes());

ig.push(
Inst::new(
"swiden_low",
r#"
Widen the low lanes of `x` using signed extension.

This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"swiden_high",
r#"
Widen the high lanes of `x` using signed extension.

This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"uwiden_low",
r#"
Widen the low lanes of `x` using unsigned extension.

This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"uwiden_high",
r#"
Widen the high lanes of `x` using unsigned extension.

This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let IntTo = &TypeVar::new(
"IntTo",
"A larger integer type with the same number of lanes",
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/ir/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,9 @@ enum OperandConstraint {

/// This operand is `ctrlType.split_lanes()`.
SplitLanes,

/// This operand is `ctrlType.merge_lanes()`.
MergeLanes,
}

impl OperandConstraint {
Expand Down Expand Up @@ -615,6 +618,11 @@ impl OperandConstraint {
.split_lanes()
.expect("invalid type for split_lanes"),
),
MergeLanes => Bound(
ctrl_type
.merge_lanes()
.expect("invalid type for merge_lanes"),
),
}
}
}
Expand Down
13 changes: 12 additions & 1 deletion cranelift/codegen/src/ir/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,14 +284,25 @@ impl Type {

/// Split the lane width in half and double the number of lanes to maintain the same bit-width.
///
/// If this is a scalar type of n bits, it produces a SIMD vector type of (n/2)x2.
/// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`.
pub fn split_lanes(self) -> Option<Self> {
match self.half_width() {
Some(half_width) => half_width.by(2),
None => None,
}
}

/// Merge lanes to half the number of lanes and double the lane width to maintain the same
/// bit-width.
///
/// If this is a scalar type, it will return `None`.
pub fn merge_lanes(self) -> Option<Self> {
match self.double_width() {
Some(double_width) => double_width.half_vector(),
None => None,
}
}

/// Index of this type, for use with hash tables etc.
pub fn index(self) -> usize {
usize::from(self.0)
Expand Down
Loading