diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs b/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs index 6883e0768..ad29bad61 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs @@ -30,7 +30,7 @@ use super::{rev_analyse::RevAnalyse, Register, VarLocation}; use crate::compile::jitc_yk::{ codegen::abs_stack::AbstractStack, - jit_ir::{Const, ConstIdx, FloatTy, Inst, InstIdx, Module, Operand, PtrAddInst, Ty}, + jit_ir::{Const, ConstIdx, FloatTy, GuardInst, Inst, InstIdx, Module, Operand, PtrAddInst, Ty}, }; use dynasmrt::{ dynasm, @@ -270,6 +270,38 @@ impl<'a> LSRegAlloc<'a> { self.assign_fp_regs(asm, iidx, fp_constraints), ) } + + /// Return a [GuardSnapshot] for the guard `ginst`. This should be called when generating code + /// for a guard: it returns the information the register allocator will later need to perform + /// its part in generating correct code for this guard's failure in + /// [Self::get_ready_for_deopt]. + pub(super) fn guard_snapshot(&self, ginst: &GuardInst) -> GuardSnapshot { + let gi = ginst.guard_info(self.m); + let mut regs_to_zero_ext = Vec::new(); + for op in gi.live_vars().iter().map(|(_, pop)| pop.unpack(self.m)) { + if let Operand::Var(op_iidx) = op { + if let Some(reg) = self.find_op_in_gp_reg(&op) { + let RegState::FromInst(_, ext) = self.gp_reg_states[usize::from(reg.code())] + else { + panic!() + }; + if ext != RegExtension::ZeroExtended { + regs_to_zero_ext.push((reg, self.m.inst(op_iidx).def_bitw(self.m))); + } + } + } + } + GuardSnapshot { regs_to_zero_ext } + } + + /// When generating the code for a guard failure, do the necessary work from the register + /// allocator's perspective (e.g. ensuring registers have an appropriate [RegExtension]) for + /// deopt to occur. + pub(super) fn get_ready_for_deopt(&self, asm: &mut Assembler, gsnap: &GuardSnapshot) { + for (reg, bitw) in &gsnap.regs_to_zero_ext { + self.force_zero_extend_to_reg64(asm, *reg, *bitw); + } + } } /// The parts of the register allocator needed for general purpose registers. @@ -813,19 +845,19 @@ impl LSRegAlloc<'_> { } } - /// Zero extend the `from_bits`-sized integer stored in `reg` up to the full size of the 64-bit + /// Zero extend the `from_bitw`-sized integer stored in `reg` up to the full size of the 64-bit /// register. /// /// `from_bits` must be between 1 and 64. - fn force_zero_extend_to_reg64(&self, asm: &mut Assembler, reg: Rq, from_bits: u32) { - debug_assert!(from_bits > 0 && from_bits <= 64); - match from_bits { - 1..=31 => dynasm!(asm; and Rd(reg.code()), ((1u64 << from_bits) - 1) as i32), + fn force_zero_extend_to_reg64(&self, asm: &mut Assembler, reg: Rq, from_bitw: u32) { + debug_assert!(from_bitw > 0 && from_bitw <= 64); + match from_bitw { + 1..=31 => dynasm!(asm; and Rd(reg.code()), ((1u64 << from_bitw) - 1) as i32), 32 => { // mov into a 32-bit register zero extends the upper 32 bits. dynasm!(asm; mov Rd(reg.code()), Rd(reg.code())); } - 64 => (), // nothing to do. + 64 => (), // There are no additional bits to zero extend x => todo!("{x}"), } } @@ -1700,6 +1732,14 @@ impl RegConstraint { } } +/// The information the register allocator records at the point of a guard's code generation that +/// it later needs to get a failing guard ready for deopt. +pub(super) struct GuardSnapshot { + /// The registers we need to zero extend: the `u32` is the `from_bitw` that is passed to + /// `force_zero_extend_to_reg64`. + regs_to_zero_ext: Vec<(Rq, u32)>, +} + #[derive(Clone, Debug)] enum RegState { Reserved, diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs index f18eb3eed..957331fff 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs @@ -60,7 +60,7 @@ pub(super) mod lsregalloc; mod rev_analyse; use deopt::{__yk_deopt, __yk_guardcheck}; -use lsregalloc::{GPConstraint, LSRegAlloc, RegConstraint, RegExtension}; +use lsregalloc::{GPConstraint, GuardSnapshot, LSRegAlloc, RegConstraint, RegExtension}; /// General purpose argument registers as defined by the x64 SysV ABI. static ARG_GP_REGS: [Rq; 6] = [Rq::RDI, Rq::RSI, Rq::RDX, Rq::RCX, Rq::R8, Rq::R9]; @@ -301,7 +301,7 @@ struct Assemble<'a> { body_start_locs: Vec, asm: dynasmrt::x64::Assembler, /// Deopt info, with one entry per guard, in the order that the guards appear in the trace. - deoptinfo: HashMap, + deoptinfo: HashMap, /// /// Maps assembly offsets to comments. /// @@ -419,7 +419,7 @@ impl<'a> Assemble<'a> { let mut infos = self .deoptinfo .iter() - .map(|(id, l)| (*id, l.fail_label)) + .map(|(id, (_regset, di))| (*id, di.fail_label)) .collect::>(); // Debugging deopt asm is much easier if the stubs are in order. #[cfg(debug_assertions)] @@ -431,6 +431,8 @@ impl<'a> Assemble<'a> { self.asm.offset(), format!("Deopt ID for guard {:?}", deoptid), ); + self.ra + .get_ready_for_deopt(&mut self.asm, &self.deoptinfo[&deoptid].0); // FIXME: Why are `deoptid`s 64 bit? We're not going to have that many guards! let deoptid = i32::try_from(deoptid).unwrap(); dynasm!(self.asm @@ -560,7 +562,11 @@ impl<'a> Assemble<'a> { Ok(Arc::new(X64CompiledTrace { mt, buf, - deoptinfo: self.deoptinfo, + deoptinfo: self + .deoptinfo + .into_iter() + .map(|(id, (_gsnap, di))| (id, di)) + .collect::>(), prevguards, sp_offset: self.ra.stack_size(), prologue_offset: self.prologue_offset.0, @@ -2712,7 +2718,8 @@ impl<'a> Assemble<'a> { inlined_frames: gi.inlined_frames().to_vec(), guard: Guard::new(), }; - self.deoptinfo.insert(inst.gidx.into(), deoptinfo); + self.deoptinfo + .insert(inst.gidx.into(), (self.ra.guard_snapshot(inst), deoptinfo)); fail_label } @@ -4350,6 +4357,30 @@ mod tests { ); } + #[test] + fn cg_deopt_reg_exts() { + codegen_and_test( + " + entry: + %0: i8 = param 0 + %1: i1 = param 1 + %2: i8 = shl %0, 1i8 + guard true, %1, [%2] + ", + " + ... + ; %2: i8 = shl %0, 1i8 + shl r.64.x, 0x01 + ; guard true, %1, ... + ... + ; deopt id for guard 0 + and r.32.x, 0xff + ... + ", + false, + ); + } + #[test] fn cg_icmp_const() { codegen_and_test(