diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 4c61954630a1..680d0921ff9f 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -397,6 +397,14 @@ impl fmt::Display for UnaryRmROpcode { } } +#[derive(Clone, Copy, PartialEq)] +pub enum CmpOpcode { + /// CMP instruction: compute `a - b` and set flags from result. + Cmp, + /// TEST instruction: compute `a & b` and set flags from result. + Test, +} + pub(crate) enum InstructionSet { SSE, SSE2, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index fb32635a9200..0bd8af840f7c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1311,7 +1311,13 @@ pub(crate) fn emit( size, src: src_e, dst: reg_g, + opcode, } => { + let is_cmp = match opcode { + CmpOpcode::Cmp => true, + CmpOpcode::Test => false, + }; + let mut prefix = LegacyPrefixes::None; if *size == 2 { prefix = LegacyPrefixes::_66; @@ -1342,16 +1348,26 @@ pub(crate) fn emit( } } - // Use the swapped operands encoding, to stay consistent with the output of + // Use the swapped operands encoding for CMP, to stay consistent with the output of // gcc/llvm. - let opcode = if *size == 1 { 0x38 } else { 0x39 }; + let opcode = match (*size, is_cmp) { + (1, true) => 0x38, + (_, true) => 0x39, + (1, false) => 0x84, + (_, false) => 0x85, + }; emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex); } RegMemImm::Mem { addr } => { let addr = &addr.finalize(state, sink); - // Whereas here we revert to the "normal" G-E ordering. - let opcode = if *size == 1 { 0x3A } else { 0x3B }; + // Whereas here we revert to the "normal" G-E ordering for CMP. + let opcode = match (*size, is_cmp) { + (1, true) => 0x3A, + (_, true) => 0x3B, + (1, false) => 0x84, + (_, false) => 0x85, + }; emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex); } @@ -1361,16 +1377,25 @@ pub(crate) fn emit( let use_imm8 = low8_will_sign_extend_to_32(*simm32); // And also here we use the "normal" G-E ordering. - let opcode = if *size == 1 { - 0x80 - } else if use_imm8 { - 0x83 + let opcode = if is_cmp { + if *size == 1 { + 0x80 + } else if use_imm8 { + 0x83 + } else { + 0x81 + } } else { - 0x81 + if *size == 1 { + 0xF6 + } else { + 0xF7 + } }; + let subopcode = if is_cmp { 7 } else { 0 }; let enc_g = int_reg_enc(*reg_g); - emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex); + emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_g, rex); emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32); } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index e2afa80e2ad8..48e831b2d87c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2648,6 +2648,88 @@ fn test_x64_emit() { "cmpb %r13b, %r14b", )); + // ======================================================== + // TestRmiR + insns.push(( + Inst::test_rmi_r(8, RegMemImm::reg(r15), rdx), + "4C85FA", + "testq %r15, %rdx", + )); + insns.push(( + Inst::test_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "48855763", + "testq 99(%rdi), %rdx", + )); + insns.push(( + Inst::test_rmi_r(8, RegMemImm::imm(76543210), rdx), + "48F7C2EAF48F04", + "testq $76543210, %rdx", + )); + // + insns.push(( + Inst::test_rmi_r(4, RegMemImm::reg(r15), rdx), + "4485FA", + "testl %r15d, %edx", + )); + insns.push(( + Inst::test_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "855763", + "testl 99(%rdi), %edx", + )); + insns.push(( + Inst::test_rmi_r(4, RegMemImm::imm(76543210), rdx), + "F7C2EAF48F04", + "testl $76543210, %edx", + )); + // + insns.push(( + Inst::test_rmi_r(2, RegMemImm::reg(r15), rdx), + "664485FA", + "testw %r15w, %dx", + )); + insns.push(( + Inst::test_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "66855763", + "testw 99(%rdi), %dx", + )); + insns.push(( + Inst::test_rmi_r(2, RegMemImm::imm(23210), rdx), + "66F7C2AA5A", + "testw $23210, %dx", + )); + // + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(r15), rdx), + "4484FA", + "testb %r15b, %dl", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "845763", + "testb 99(%rdi), %dl", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::imm(70), rdx), + "F6C246", + "testb $70, %dl", + )); + // Extra byte-cases (paranoia!) for test_rmi_r for first operand = R + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(rax), rbx), + "84C3", + "testb %al, %bl", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(rcx), rsi), + "4084CE", + "testb %cl, %sil", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(rcx), r10), + "4184CA", + "testb %cl, %r10b", + )); + // ======================================================== // SetCC insns.push((Inst::setcc(CC::O, w_rsi), "400F90C6", "seto %sil")); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 806e8f276e59..0f0866c81321 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -168,9 +168,10 @@ pub enum Inst { dst: Writable, }, - /// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg. + /// Integer comparisons/tests: cmp or test (b w l q) (reg addr imm) reg. CmpRmiR { size: u8, // 1, 2, 4 or 8 + opcode: CmpOpcode, src: RegMemImm, dst: Reg, }, @@ -913,8 +914,30 @@ impl Inst { ) -> Inst { src.assert_regclass_is(RegClass::I64); debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); - debug_assert!(dst.get_class() == RegClass::I64); - Inst::CmpRmiR { size, src, dst } + debug_assert_eq!(dst.get_class(), RegClass::I64); + Inst::CmpRmiR { + size, + src, + dst, + opcode: CmpOpcode::Cmp, + } + } + + /// Does a comparison of dst & src for operands of size `size`. + pub(crate) fn test_rmi_r( + size: u8, // 1, 2, 4 or 8 + src: RegMemImm, + dst: Reg, + ) -> Inst { + src.assert_regclass_is(RegClass::I64); + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + debug_assert_eq!(dst.get_class(), RegClass::I64); + Inst::CmpRmiR { + size, + src, + dst, + opcode: CmpOpcode::Test, + } } pub(crate) fn trap(trap_code: TrapCode) -> Inst { @@ -1597,12 +1620,23 @@ impl PrettyPrint for Inst { dst.to_reg().show_rru(mb_rru) ), - Inst::CmpRmiR { size, src, dst } => format!( - "{} {}, {}", - ljustify2("cmp".to_string(), suffix_bwlq(*size)), - src.show_rru_sized(mb_rru, *size), - show_ireg_sized(*dst, mb_rru, *size) - ), + Inst::CmpRmiR { + size, + src, + dst, + opcode, + } => { + let op = match opcode { + CmpOpcode::Cmp => "cmp", + CmpOpcode::Test => "test", + }; + format!( + "{} {}, {}", + ljustify2(op.to_string(), suffix_bwlq(*size)), + src.show_rru_sized(mb_rru, *size), + show_ireg_sized(*dst, mb_rru, *size) + ) + } Inst::Setcc { cc, dst } => format!( "{} {}", diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 68473a8afbf8..e89b8a24ffca 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -175,7 +175,7 @@ mod tests { _ => panic!("expected unwind information"), }; - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 23, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (16, RememberState), (18, RestoreState)] }"); + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 22, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (15, RememberState), (17, RestoreState)] }"); } fn create_multi_return_function(call_conv: CallConv) -> Function { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 6e6198c44b83..9299fce738b4 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -248,12 +248,6 @@ fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option }) } -fn input_to_sext_imm>(ctx: &mut C, spec: InsnInput) -> Option { - let input = ctx.get_input_as_source_or_const(spec.insn, spec.input); - let input_ty = ctx.input_ty(spec.insn, spec.input); - non_reg_input_to_sext_imm(input, input_ty) -} - fn input_to_imm>(ctx: &mut C, spec: InsnInput) -> Option { ctx.get_input_as_source_or_const(spec.insn, spec.input) .constant @@ -3731,10 +3725,25 @@ fn lower_insn_to_regs>( let cond_code = ctx.data(icmp).cond_code().unwrap(); CC::from_intcc(cond_code) } else { - // The input is a boolean value, compare it against zero. + let sel_ty = ctx.input_ty(insn, 0); let size = ctx.input_ty(insn, 0).bytes() as u8; let test = put_input_in_reg(ctx, flag_input); - ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test)); + let test_input = if sel_ty == types::B1 { + // The input is a boolean value; test the LSB for nonzero with: + // test reg, 1 + RegMemImm::imm(1) + } else { + // The input is an integer; test the whole value for + // nonzero with: + // test reg, reg + // + // (It doesn't make sense to have a boolean wider than + // one bit here -- which bit would cause us to select an + // input?) + assert!(!is_bool_ty(sel_ty)); + RegMemImm::reg(test) + }; + ctx.emit(Inst::test_rmi_r(size, test_input, test)); CC::NZ }; @@ -4355,7 +4364,18 @@ impl LowerBackend for X64Backend { _ => unreachable!(), }; let size_bytes = src_ty.bytes() as u8; - ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src)); + // See case for `Opcode::Select` above re: testing the + // boolean input. + let test_input = if src_ty == types::B1 { + // test src, 1 + RegMemImm::imm(1) + } else { + assert!(!is_bool_ty(src_ty)); + // test src, src + RegMemImm::reg(src) + }; + + ctx.emit(Inst::test_rmi_r(size_bytes, test_input, src)); ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); } else { unimplemented!("brz/brnz with non-int type {:?}", src_ty); diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif new file mode 100644 index 000000000000..7b65fa4e55de --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -0,0 +1,73 @@ +test compile +target x86_64 +feature "experimental_x64" + +function %f0(b1, i32, i32) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + +block0(v0: b1, v1: i32, v2: i32): + v3 = select.i32 v0, v1, v2 +; nextln: testb $$1, %dil +; nextln: cmovnzl %esi, %edx + + return v3 +; nextln: movq %rdx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} + +function %f1(b1) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + +block0(v0: b1): + brnz v0, block1 + jump block2 +; nextln: testb $$1, %dil +; nextln: jnz label1; j label2 + +block1: + v1 = iconst.i32 1 + return v1 +; check: movl $$1, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +block2: + v2 = iconst.i32 2 + return v2 +; check: movl $$2, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} + +function %f2(b1) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + +block0(v0: b1): + brz v0, block1 + jump block2 +; nextln: testb $$1, %dil +; nextln: jz label1; j label2 + +block1: + v1 = iconst.i32 1 + return v1 +; check: movl $$1, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +block2: + v2 = iconst.i32 2 + return v2 +; check: movl $$2, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +}