Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Infer REX prefix for SIMD store and vconst instructions #1388

Merged
merged 2 commits into from
Feb 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1795,14 +1795,14 @@ fn define_simd(

let is_zero_128bit =
InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
let template = rec_vconst_optimized.nonrex().opcodes(&PXOR);
let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
e.enc_32_64_func(instruction.clone(), template, |builder| {
builder.inst_predicate(is_zero_128bit)
});

let is_ones_128bit =
InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
let template = rec_vconst_optimized.nonrex().opcodes(&PCMPEQB);
let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
e.enc_32_64_func(instruction, template, |builder| {
builder.inst_predicate(is_ones_128bit)
});
Expand All @@ -1816,7 +1816,7 @@ fn define_simd(
// in memory) but some performance measurements are needed.
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let instruction = vconst.bind(vector(ty, sse_vector_size));
let template = rec_vconst.nonrex().opcodes(&MOVUPS_LOAD);
let template = rec_vconst.opcodes(&MOVUPS_LOAD).infer_rex();
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
}

Expand All @@ -1826,13 +1826,19 @@ fn define_simd(
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
// Store
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
e.enc_32_64(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
e.enc_32_64(
bound_store.clone(),
rec_fst.opcodes(&MOVUPS_STORE).infer_rex(),
);
e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));

// Load
let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
e.enc_32_64(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
e.enc_32_64(
bound_load.clone(),
rec_fld.opcodes(&MOVUPS_LOAD).infer_rex(),
);
e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));

Expand Down
22 changes: 18 additions & 4 deletions cranelift-codegen/meta/src/isa/x86/recipes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ impl<'builder> RecipeGroup<'builder> {
self.templates.push(template.clone());
template
}
fn add_template_inferred(
&mut self,
recipe: EncodingRecipeBuilder,
infer_function: &'static str,
) -> Rc<Template<'builder>> {
let template =
Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
self.templates.push(template.clone());
template
}
fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
let template = Rc::new(template);
self.templates.push(template.clone());
Expand Down Expand Up @@ -1481,7 +1491,7 @@ pub(crate) fn define<'shared>(
);

// XX /r register-indirect store of FPR with no offset.
recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("fst", &formats.store, 1)
.operands_in(vec![fpr, gpr])
.inst_predicate(has_no_offset)
Expand All @@ -1504,6 +1514,7 @@ pub(crate) fn define<'shared>(
}
"#,
),
"size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
);

let has_small_offset =
Expand Down Expand Up @@ -1991,7 +2002,7 @@ pub(crate) fn define<'shared>(
);

// XX /r float load with no offset.
recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("fld", &formats.load, 1)
.operands_in(vec![gpr])
.operands_out(vec![fpr])
Expand All @@ -2015,6 +2026,7 @@ pub(crate) fn define<'shared>(
}
"#,
),
"size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
);

let has_small_offset =
Expand Down Expand Up @@ -2515,7 +2527,7 @@ pub(crate) fn define<'shared>(
),
);

recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
.operands_out(vec![fpr])
.clobbers_flags(false)
Expand All @@ -2526,9 +2538,10 @@ pub(crate) fn define<'shared>(
const_disp4(constant_handle, func, sink);
"#,
),
"size_with_inferred_rex_for_outreg0",
);

recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
.operands_out(vec![fpr])
.clobbers_flags(false)
Expand All @@ -2538,6 +2551,7 @@ pub(crate) fn define<'shared>(
modrm_rr(out_reg0, out_reg0, sink);
"#,
),
"size_with_inferred_rex_for_outreg0",
);

recipes.add_template_recipe(
Expand Down
46 changes: 46 additions & 0 deletions cranelift-codegen/src/isa/x86/enc_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,39 @@ fn size_plus_maybe_sib_or_offset_for_inreg_1(
sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
}

/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset.
fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|| test_input(0, inst, divert, func, is_extended_reg)
|| test_input(1, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func)
+ if needs_rex { 1 } else { 0 }
}

/// Calculates the size while inferring if the first input register (inreg0) and first output
/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
/// SIB or offset.
fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|| test_input(0, inst, divert, func, is_extended_reg)
|| test_result(0, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func)
+ if needs_rex { 1 } else { 0 }
}

/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg.
///
/// A REX prefix is known to be emitted if either:
Expand Down Expand Up @@ -199,6 +232,19 @@ fn size_with_inferred_rex_for_inreg0_outreg0(
sizing.base_size + if needs_rex { 1 } else { 0 }
}

/// Infers whether a dynamic REX prefix will be emitted, based on a single output register.
fn size_with_inferred_rex_for_outreg0(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|| test_result(0, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}

/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV.
///
/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2.
Expand Down
11 changes: 11 additions & 0 deletions filetests/isa/x86/binary64.clif
Original file line number Diff line number Diff line change
Expand Up @@ -1679,3 +1679,14 @@ block0:
[-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001
return
}

function %V128() {
block0:
[-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
[-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33)
store v4, v3 ; bin: heap_oob 45 0f 11 0a

[-, %r11] v5 = iconst.i64 0x1234
[-, %xmm2] v6 = load.i32x4 v5 ; bin: heap_oob 41 0f 10 13
return
}