Skip to content

Commit

Permalink
Merge pull request #1595 from ltratt/zero_extend_for_deopt
Browse files Browse the repository at this point in the history
Appropriately zero extend registers when a guard fails.
  • Loading branch information
vext01 authored Feb 10, 2025
2 parents 16f221b + e772f0c commit c3537dd
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 12 deletions.
54 changes: 47 additions & 7 deletions ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
use super::{rev_analyse::RevAnalyse, Register, VarLocation};
use crate::compile::jitc_yk::{
codegen::abs_stack::AbstractStack,
jit_ir::{Const, ConstIdx, FloatTy, Inst, InstIdx, Module, Operand, PtrAddInst, Ty},
jit_ir::{Const, ConstIdx, FloatTy, GuardInst, Inst, InstIdx, Module, Operand, PtrAddInst, Ty},
};
use dynasmrt::{
dynasm,
Expand Down Expand Up @@ -270,6 +270,38 @@ impl<'a> LSRegAlloc<'a> {
self.assign_fp_regs(asm, iidx, fp_constraints),
)
}

/// Return a [GuardSnapshot] for the guard `ginst`. This should be called when generating code
/// for a guard: it returns the information the register allocator will later need to perform
/// its part in generating correct code for this guard's failure in
/// [Self::get_ready_for_deopt].
pub(super) fn guard_snapshot(&self, ginst: &GuardInst) -> GuardSnapshot {
let gi = ginst.guard_info(self.m);
let mut regs_to_zero_ext = Vec::new();
for op in gi.live_vars().iter().map(|(_, pop)| pop.unpack(self.m)) {
if let Operand::Var(op_iidx) = op {
if let Some(reg) = self.find_op_in_gp_reg(&op) {
let RegState::FromInst(_, ext) = self.gp_reg_states[usize::from(reg.code())]
else {
panic!()
};
if ext != RegExtension::ZeroExtended {
regs_to_zero_ext.push((reg, self.m.inst(op_iidx).def_bitw(self.m)));
}
}
}
}
GuardSnapshot { regs_to_zero_ext }
}

/// When generating the code for a guard failure, do the necessary work from the register
/// allocator's perspective (e.g. ensuring registers have an appropriate [RegExtension]) for
/// deopt to occur.
pub(super) fn get_ready_for_deopt(&self, asm: &mut Assembler, gsnap: &GuardSnapshot) {
for (reg, bitw) in &gsnap.regs_to_zero_ext {
self.force_zero_extend_to_reg64(asm, *reg, *bitw);
}
}
}

/// The parts of the register allocator needed for general purpose registers.
Expand Down Expand Up @@ -813,19 +845,19 @@ impl LSRegAlloc<'_> {
}
}

/// Zero extend the `from_bits`-sized integer stored in `reg` up to the full size of the 64-bit
/// Zero extend the `from_bitw`-sized integer stored in `reg` up to the full size of the 64-bit
/// register.
///
/// `from_bits` must be between 1 and 64.
fn force_zero_extend_to_reg64(&self, asm: &mut Assembler, reg: Rq, from_bits: u32) {
debug_assert!(from_bits > 0 && from_bits <= 64);
match from_bits {
1..=31 => dynasm!(asm; and Rd(reg.code()), ((1u64 << from_bits) - 1) as i32),
fn force_zero_extend_to_reg64(&self, asm: &mut Assembler, reg: Rq, from_bitw: u32) {
debug_assert!(from_bitw > 0 && from_bitw <= 64);
match from_bitw {
1..=31 => dynasm!(asm; and Rd(reg.code()), ((1u64 << from_bitw) - 1) as i32),
32 => {
// mov into a 32-bit register zero extends the upper 32 bits.
dynasm!(asm; mov Rd(reg.code()), Rd(reg.code()));
}
64 => (), // nothing to do.
64 => (), // There are no additional bits to zero extend
x => todo!("{x}"),
}
}
Expand Down Expand Up @@ -1700,6 +1732,14 @@ impl<R: dynasmrt::Register> RegConstraint<R> {
}
}

/// The information the register allocator records at the point of a guard's code generation that
/// it later needs to get a failing guard ready for deopt.
pub(super) struct GuardSnapshot {
/// The registers we need to zero extend: the `u32` is the `from_bitw` that is passed to
/// `force_zero_extend_to_reg64`.
regs_to_zero_ext: Vec<(Rq, u32)>,
}

#[derive(Clone, Debug)]
enum RegState {
Reserved,
Expand Down
41 changes: 36 additions & 5 deletions ykrt/src/compile/jitc_yk/codegen/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub(super) mod lsregalloc;
mod rev_analyse;

use deopt::{__yk_deopt, __yk_guardcheck};
use lsregalloc::{GPConstraint, LSRegAlloc, RegConstraint, RegExtension};
use lsregalloc::{GPConstraint, GuardSnapshot, LSRegAlloc, RegConstraint, RegExtension};

/// General purpose argument registers as defined by the x64 SysV ABI.
static ARG_GP_REGS: [Rq; 6] = [Rq::RDI, Rq::RSI, Rq::RDX, Rq::RCX, Rq::R8, Rq::R9];
Expand Down Expand Up @@ -301,7 +301,7 @@ struct Assemble<'a> {
body_start_locs: Vec<VarLocation>,
asm: dynasmrt::x64::Assembler,
/// Deopt info, with one entry per guard, in the order that the guards appear in the trace.
deoptinfo: HashMap<usize, DeoptInfo>,
deoptinfo: HashMap<usize, (GuardSnapshot, DeoptInfo)>,
///
/// Maps assembly offsets to comments.
///
Expand Down Expand Up @@ -419,7 +419,7 @@ impl<'a> Assemble<'a> {
let mut infos = self
.deoptinfo
.iter()
.map(|(id, l)| (*id, l.fail_label))
.map(|(id, (_regset, di))| (*id, di.fail_label))
.collect::<Vec<_>>();
// Debugging deopt asm is much easier if the stubs are in order.
#[cfg(debug_assertions)]
Expand All @@ -431,6 +431,8 @@ impl<'a> Assemble<'a> {
self.asm.offset(),
format!("Deopt ID for guard {:?}", deoptid),
);
self.ra
.get_ready_for_deopt(&mut self.asm, &self.deoptinfo[&deoptid].0);
// FIXME: Why are `deoptid`s 64 bit? We're not going to have that many guards!
let deoptid = i32::try_from(deoptid).unwrap();
dynasm!(self.asm
Expand Down Expand Up @@ -560,7 +562,11 @@ impl<'a> Assemble<'a> {
Ok(Arc::new(X64CompiledTrace {
mt,
buf,
deoptinfo: self.deoptinfo,
deoptinfo: self
.deoptinfo
.into_iter()
.map(|(id, (_gsnap, di))| (id, di))
.collect::<HashMap<_, _>>(),
prevguards,
sp_offset: self.ra.stack_size(),
prologue_offset: self.prologue_offset.0,
Expand Down Expand Up @@ -2712,7 +2718,8 @@ impl<'a> Assemble<'a> {
inlined_frames: gi.inlined_frames().to_vec(),
guard: Guard::new(),
};
self.deoptinfo.insert(inst.gidx.into(), deoptinfo);
self.deoptinfo
.insert(inst.gidx.into(), (self.ra.guard_snapshot(inst), deoptinfo));
fail_label
}

Expand Down Expand Up @@ -4350,6 +4357,30 @@ mod tests {
);
}

#[test]
fn cg_deopt_reg_exts() {
codegen_and_test(
"
entry:
%0: i8 = param 0
%1: i1 = param 1
%2: i8 = shl %0, 1i8
guard true, %1, [%2]
",
"
...
; %2: i8 = shl %0, 1i8
shl r.64.x, 0x01
; guard true, %1, ...
...
; deopt id for guard 0
and r.32.x, 0xff
...
",
false,
);
}

#[test]
fn cg_icmp_const() {
codegen_and_test(
Expand Down

0 comments on commit c3537dd

Please sign in to comment.