diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index b17e2b111b65..e6f082d70a8d 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -65,7 +65,7 @@ pub use crate::ir::stackslot::{ }; pub use crate::ir::trapcode::TrapCode; pub use crate::ir::types::Type; -pub use crate::ir::user_stack_maps::UserStackMapEntry; +pub use crate::ir::user_stack_maps::{UserStackMap, UserStackMapEntry}; use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; diff --git a/cranelift/codegen/src/ir/user_stack_maps.rs b/cranelift/codegen/src/ir/user_stack_maps.rs index 4f9daf910bf9..48ae47c82bf8 100644 --- a/cranelift/codegen/src/ir/user_stack_maps.rs +++ b/cranelift/codegen/src/ir/user_stack_maps.rs @@ -29,13 +29,19 @@ //! contrast to the old system and its `r64` values). use crate::ir; +use cranelift_bitset::CompoundBitSet; +use cranelift_entity::PrimaryMap; use smallvec::SmallVec; pub(crate) type UserStackMapEntryVec = SmallVec<[UserStackMapEntry; 4]>; -/// A stack map entry describes a GC-managed value and its location at a -/// particular instruction. -#[derive(Clone, PartialEq, Hash)] +/// A stack map entry describes a single GC-managed value and its location on +/// the stack. +/// +/// A stack map entry is associated with a particular instruction, and that +/// instruction must be a safepoint. The GC-managed value must be stored in the +/// described location across this entry's instruction. +#[derive(Clone, Debug, PartialEq, Hash)] #[cfg_attr( feature = "enable-serde", derive(serde_derive::Serialize, serde_derive::Deserialize) @@ -50,3 +56,46 @@ pub struct UserStackMapEntry { /// The offset within the stack slot where this entry's value can be found. pub offset: u32, } + +/// A compiled stack map, describing the location of many GC-managed values. +/// +/// A stack map is associated with a particular instruction, and that +/// instruction is a safepoint. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr( + feature = "enable-serde", + derive(serde_derive::Deserialize, serde_derive::Serialize) +)] +pub struct UserStackMap { + by_type: SmallVec<[(ir::Type, CompoundBitSet); 1]>, +} + +impl UserStackMap { + /// Coalesce the given entries into a new `UserStackMap`. + pub fn new( + entries: &[UserStackMapEntry], + stack_slot_offsets: &PrimaryMap, + ) -> Self { + let mut by_type = SmallVec::<[(ir::Type, CompoundBitSet); 1]>::default(); + + for entry in entries { + let offset = stack_slot_offsets[entry.slot] + entry.offset; + let offset = usize::try_from(offset).unwrap(); + + // Don't bother trying to avoid an `O(n)` search here: `n` is + // basically always one in practice; even if it isn't, there aren't + // that many different CLIF types. + let index = by_type + .iter() + .position(|(ty, _)| *ty == entry.ty) + .unwrap_or_else(|| { + by_type.push((entry.ty, CompoundBitSet::with_capacity(offset + 1))); + by_type.len() - 1 + }); + + by_type[index].1.insert(offset); + } + + UserStackMap { by_type } + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 42afe51d455f..c1f91b07a432 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -651,11 +651,18 @@ fn enc_asimd_mod_imm(rd: Writable, q_op: u32, cmode: u32, imm: u8) -> u32 { /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + frame_layout: FrameLayout, } @@ -663,13 +670,19 @@ impl MachInstEmitState for EmitState { fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self { EmitState { stack_map: None, + user_stack_map: None, ctrl_plane, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -686,8 +699,8 @@ impl MachInstEmitState for EmitState { } impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } fn clear_post_insn(&mut self) { @@ -2921,11 +2934,16 @@ impl MachInstEmit for Inst { } } &Inst::Call { ref info } => { - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } sink.add_reloc(Reloc::Arm64Call, &info.dest, 0); sink.put4(enc_jump26(0b100101, 0)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } if info.opcode.is_call() { sink.add_call_site(info.opcode); } @@ -2939,11 +2957,16 @@ impl MachInstEmit for Inst { } } &Inst::CallInd { ref info } => { - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } let rn = info.rn; sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } if info.opcode.is_call() { sink.add_call_site(info.opcode); } diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 90186fb9fbfb..fb63c355fc29 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -46,20 +46,28 @@ pub enum EmitVState { /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + /// Vector State /// Controls the current state of the vector unit at the emission point. vstate: EmitVState, + frame_layout: FrameLayout, } impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } } @@ -70,14 +78,20 @@ impl MachInstEmitState for EmitState { ) -> Self { EmitState { stack_map: None, + user_stack_map: None, ctrl_plane, vstate: EmitVState::Unknown, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -1134,13 +1148,21 @@ impl Inst { sink.add_call_site(info.opcode); } sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0); - if let Some(s) = state.take_stack_map() { + + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); } + Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0) .into_iter() .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap(); if callee_pop_size > 0 { for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) { @@ -1158,9 +1180,14 @@ impl Inst { } .emit(sink, emit_info, state); - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } if info.opcode.is_call() { sink.add_call_site(info.opcode); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index b98e064ad225..5c72b887e9e7 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -1,7 +1,7 @@ //! S390x ISA: binary code emission. use crate::binemit::StackMap; -use crate::ir::{MemFlags, TrapCode}; +use crate::ir::{self, MemFlags, TrapCode}; use crate::isa::s390x::inst::*; use crate::isa::s390x::settings as s390x_settings; use cranelift_control::ControlPlane; @@ -1306,11 +1306,19 @@ fn put_with_trap(sink: &mut MachBuffer, enc: &[u8], trap_code: TrapCode) { #[derive(Default, Clone, Debug)] pub struct EmitState { pub(crate) initial_sp_offset: i64, - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + frame_layout: FrameLayout, } @@ -1319,13 +1327,19 @@ impl MachInstEmitState for EmitState { EmitState { initial_sp_offset: abi.frame_size() as i64, stack_map: None, + user_stack_map: None, ctrl_plane, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -1342,8 +1356,8 @@ impl MachInstEmitState for EmitState { } impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } fn clear_post_insn(&mut self) { @@ -3243,9 +3257,15 @@ impl Inst { _ => unreachable!(), } - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(6), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset() + 6; + sink.push_user_stack_map(state, offset, s); + } + put(sink, &enc_ril_b(opcode, link.to_reg(), 0)); if info.opcode.is_call() { sink.add_call_site(info.opcode); @@ -3255,10 +3275,16 @@ impl Inst { debug_assert_eq!(link.to_reg(), gpr(14)); let rn = info.rn; - let opcode = 0x0d; // BASR - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset() + 2; + sink.push_user_stack_map(state, offset, s); + } + + let opcode = 0x0d; // BASR put(sink, &enc_rr(opcode, link.to_reg(), rn)); if info.opcode.is_call() { sink.add_call_site(info.opcode); diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index abfcc447770c..3e0d44059c83 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1599,9 +1599,15 @@ pub(crate) fn emit( opcode, info: call_info, } => { - if let Some(s) = state.take_stack_map() { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset() + 5; + sink.push_user_stack_map(state, offset, s); + } + sink.put1(0xE8); // The addend adjusts for the difference between the end of the instruction and the // beginning of the immediate field. @@ -1696,9 +1702,16 @@ pub(crate) fn emit( ); } } - if let Some(s) = state.take_stack_map() { + + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); } + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + if opcode.is_call() { sink.add_call_site(*opcode); } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_state.rs b/cranelift/codegen/src/isa/x64/inst/emit_state.rs index 21f01dcc01b7..9cca7d7895d8 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_state.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_state.rs @@ -1,11 +1,18 @@ use super::*; +use crate::ir; use cranelift_control::ControlPlane; /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + /// Safepoint stack map for upcoming instruction, as provided to + /// `pre_safepoint()`. stack_map: Option, + + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, @@ -19,13 +26,19 @@ impl MachInstEmitState for EmitState { fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self { EmitState { stack_map: None, + user_stack_map: None, ctrl_plane, frame_layout: abi.frame_layout().clone(), } } - fn pre_safepoint(&mut self, stack_map: StackMap) { - self.stack_map = Some(stack_map); + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; } fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { @@ -42,8 +55,8 @@ impl MachInstEmitState for EmitState { } impl EmitState { - pub(crate) fn take_stack_map(&mut self) -> Option { - self.stack_map.take() + pub(crate) fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) } pub(crate) fn clear_post_insn(&mut self) { diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 713fcec2540e..fd9555b7c499 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -59,8 +59,10 @@ //! //! ```plain //! (high address) -//! -//! +---------------------------+ +//! | ... | +//! | caller frames | +//! | ... | +//! +===========================+ //! | ... | //! | stack args | //! Canonical Frame Address --> | (accessed via FP) | @@ -68,24 +70,24 @@ //! SP at function entry -----> | return address | //! +---------------------------+ //! FP after prologue --------> | FP (pushed by prologue) | -//! +---------------------------+ -//! | ... | -//! | clobbered callee-saves | -//! unwind-frame base --------> | (pushed by prologue) | -//! +---------------------------+ -//! | ... | -//! | spill slots | -//! | (accessed via SP) | -//! | ... | -//! | stack slots | -//! | (accessed via SP) | -//! | (alloc'd by prologue) | -//! +---------------------------+ -//! | [alignment as needed] | -//! | ... | -//! | args for largest call | -//! SP -----------------------> | (alloc'd by prologue) | -//! +---------------------------+ +//! +---------------------------+ ----- +//! | ... | | +//! | clobbered callee-saves | | +//! unwind-frame base --------> | (pushed by prologue) | | +//! +---------------------------+ | +//! | ... | | +//! | spill slots | | +//! | (accessed via SP) | active +//! | ... | size +//! | stack slots | | +//! | (accessed via SP) | | +//! | (alloc'd by prologue) | | +//! +---------------------------+ | +//! | [alignment as needed] | | +//! | ... | | +//! | args for largest call | | +//! SP -----------------------> | (alloc'd by prologue) | | +//! +===========================+ ----- //! //! (low address) //! ``` @@ -1012,6 +1014,12 @@ impl FrameLayout { debug_assert!(floats.iter().all(|r| r.to_reg().class() == RegClass::Float)); (ints, floats) } + + /// The size of FP to SP while the frame is active (not during prologue + /// setup or epilogue tear down). + pub fn active_size(&self) -> u32 { + self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size + } } /// ABI object for a function body. diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index c4ffbb2f8976..c02b8596d05d 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -178,6 +178,7 @@ use crate::machinst::{ BlockIndex, MachInstLabelUse, TextSectionBuilder, VCodeConstant, VCodeConstants, VCodeInst, }; use crate::trace; +use crate::{ir, MachInstEmitState}; use crate::{timing, VCodeConstantData}; use cranelift_control::ControlPlane; use cranelift_entity::{entity_impl, PrimaryMap}; @@ -250,6 +251,11 @@ pub struct MachBuffer { srclocs: SmallVec<[MachSrcLoc; 64]>, /// Any stack maps referring to this code. stack_maps: SmallVec<[MachStackMap; 8]>, + /// Any user stack maps for this code. + /// + /// Each entry is an `(offset, span, stack_map)` triple. Entries are sorted + /// by code offset, and each stack map covers `span` bytes on the stack. + user_stack_maps: SmallVec<[(CodeOffset, u32, ir::UserStackMap); 8]>, /// Any unwind info at a given location. unwind_info: SmallVec<[(CodeOffset, UnwindInst); 8]>, /// The current source location in progress (after `start_srcloc()` and @@ -329,6 +335,7 @@ impl MachBufferFinalized { .map(|srcloc| srcloc.apply_base_srcloc(base_srcloc)) .collect(), stack_maps: self.stack_maps, + user_stack_maps: self.user_stack_maps, unwind_info: self.unwind_info, alignment: self.alignment, } @@ -357,9 +364,14 @@ pub struct MachBufferFinalized { pub(crate) srclocs: SmallVec<[T::MachSrcLocType; 64]>, /// Any stack maps referring to this code. pub(crate) stack_maps: SmallVec<[MachStackMap; 8]>, + /// Any user stack maps for this code. + /// + /// Each entry is an `(offset, span, stack_map)` triple. Entries are sorted + /// by code offset, and each stack map covers `span` bytes on the stack. + pub(crate) user_stack_maps: SmallVec<[(CodeOffset, u32, ir::UserStackMap); 8]>, /// Any unwind info at a given location. pub unwind_info: SmallVec<[(CodeOffset, UnwindInst); 8]>, - /// The requireed alignment of this buffer + /// The required alignment of this buffer. pub alignment: u32, } @@ -447,6 +459,7 @@ impl MachBuffer { call_sites: SmallVec::new(), srclocs: SmallVec::new(), stack_maps: SmallVec::new(), + user_stack_maps: SmallVec::new(), unwind_info: SmallVec::new(), cur_srcloc: None, label_offsets: SmallVec::new(), @@ -1532,6 +1545,7 @@ impl MachBuffer { call_sites: self.call_sites, srclocs, stack_maps: self.stack_maps, + user_stack_maps: self.user_stack_maps, unwind_info: self.unwind_info, alignment, } @@ -1667,6 +1681,36 @@ impl MachBuffer { stack_map, }); } + + /// Push a user stack map onto this buffer. + /// + /// The stack map is associated with the given `return_addr` code + /// offset. This must be the PC for the instruction just *after* this stack + /// map's associated instruction. For example in the sequence `call $foo; + /// add r8, rax`, the `return_addr` must be the offset of the start of the + /// `add` instruction. + /// + /// Stack maps must be pushed in sorted `return_addr` order. + pub fn push_user_stack_map( + &mut self, + emit_state: &I::State, + return_addr: CodeOffset, + stack_map: ir::UserStackMap, + ) { + let span = emit_state.frame_layout().active_size(); + trace!("Adding user stack map @ {return_addr:#x} spanning {span} bytes: {stack_map:?}"); + + debug_assert!( + self.user_stack_maps + .last() + .map_or(true, |(prev_addr, _, _)| *prev_addr < return_addr), + "pushed stack maps out of order: {} is not less than {}", + self.user_stack_maps.last().unwrap().0, + return_addr, + ); + + self.user_stack_maps.push((return_addr, span, stack_map)); + } } impl MachBufferFinalized { diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index ea2be76d485b..32db947a259e 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -14,9 +14,9 @@ use crate::ir::{ Value, ValueDef, ValueLabelAssignments, ValueLabelStart, }; use crate::machinst::{ - writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, InsnIndex, LoweredBlock, - MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, - VCodeInst, ValueRegs, Writable, + writable_value_regs, BackwardsInsnIndex, BlockIndex, BlockLoweringOrder, Callee, InsnIndex, + LoweredBlock, MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, + VCodeConstants, VCodeInst, ValueRegs, Writable, }; use crate::settings::Flags; use crate::{trace, CodegenResult}; @@ -485,8 +485,8 @@ impl<'func, I: VCodeInst> Lower<'func, I> { /// Pre-analysis: compute `value_ir_uses`. See comment on /// `ValueUseState` for a description of what this analysis /// computes. - fn compute_use_states<'a>( - f: &'a Function, + fn compute_use_states( + f: &Function, sret_param: Option, ) -> SecondaryMap { // We perform the analysis without recursion, so we don't @@ -803,9 +803,39 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } } + let start = self.vcode.vcode.num_insts(); let loc = self.srcloc(inst); self.finish_ir_inst(loc); + // If the instruction had a user stack map, forward it from the CLIF + // to the vcode. + if let Some(entries) = self.f.dfg.user_stack_map_entries(inst) { + let end = self.vcode.vcode.num_insts(); + debug_assert!(end > start); + debug_assert_eq!( + (start..end) + .filter(|i| self.vcode.vcode[InsnIndex::new(*i)].is_safepoint()) + .count(), + 1 + ); + for i in start..end { + let iix = InsnIndex::new(i); + if self.vcode.vcode[iix].is_safepoint() { + trace!( + "Adding user stack map from clif\n\n\ + {inst:?} `{}`\n\n\ + to vcode\n\n\ + {iix:?} `{}`", + self.f.dfg.display_inst(inst), + &self.vcode.vcode[iix].pretty_print_inst(&mut Default::default()), + ); + self.vcode + .add_user_stack_map(BackwardsInsnIndex::new(iix.index()), entries); + break; + } + } + } + // maybe insert random instruction if ctrl_plane.get_decision() { if ctrl_plane.get_decision() { diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index bf15ec462dae..af87c3053a6b 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -45,8 +45,9 @@ //! ``` use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; -use crate::ir::function::FunctionParameters; -use crate::ir::{DynamicStackSlot, RelSourceLoc, StackSlot, Type}; +use crate::ir::{ + self, function::FunctionParameters, DynamicStackSlot, RelSourceLoc, StackSlot, Type, +}; use crate::isa::FunctionAlignment; use crate::result::CodegenResult; use crate::settings; @@ -284,10 +285,13 @@ pub enum MachTerminator { pub trait MachInstEmit: MachInst { /// Persistent state carried across `emit` invocations. type State: MachInstEmitState; + /// Constant information used in `emit` invocations. type Info; + /// Emit the instruction. fn emit(&self, code: &mut MachBuffer, info: &Self::Info, state: &mut Self::State); + /// Pretty-print the instruction. fn pretty_print_inst(&self, state: &mut Self::State) -> String; } @@ -297,20 +301,29 @@ pub trait MachInstEmit: MachInst { pub trait MachInstEmitState: Default + Clone + Debug { /// Create a new emission state given the ABI object. fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self; + /// Update the emission state before emitting an instruction that is a /// safepoint. - fn pre_safepoint(&mut self, _stack_map: StackMap) {} + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ); + /// The emission state holds ownership of a control plane, so it doesn't /// have to be passed around explicitly too much. `ctrl_plane_mut` may /// be used if temporary access to the control plane is needed by some /// other function that doesn't have access to the emission state. fn ctrl_plane_mut(&mut self) -> &mut ControlPlane; + /// Used to continue using a control plane after the emission state is /// not needed anymore. fn take_ctrl_plane(self) -> ControlPlane; + /// A hook that triggers when first emitting a new block. /// It is guaranteed to be called before any instructions are emitted. fn on_new_block(&mut self) {} + /// The [`FrameLayout`] for the function currently being compiled. fn frame_layout(&self) -> &FrameLayout; } diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 7a971be74c84..48a45c90eb8f 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -40,6 +40,33 @@ use std::fmt; /// Index referring to an instruction in VCode. pub type InsnIndex = regalloc2::Inst; +/// Extension trait for `InsnIndex` to allow conversion to a +/// `BackwardsInsnIndex`. +trait ToBackwardsInsnIndex { + fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex; +} + +impl ToBackwardsInsnIndex for InsnIndex { + fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex { + BackwardsInsnIndex::new(num_insts - self.index() - 1) + } +} + +/// An index referring to an instruction in the VCode when it is backwards, +/// during VCode construction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr( + feature = "enable-serde", + derive(::serde::Serialize, ::serde::Deserialize) +)] +pub struct BackwardsInsnIndex(InsnIndex); + +impl BackwardsInsnIndex { + pub fn new(i: usize) -> Self { + BackwardsInsnIndex(InsnIndex::new(i)) + } +} + /// Index referring to a basic block in VCode. pub type BlockIndex = regalloc2::Block; @@ -67,6 +94,14 @@ pub struct VCode { /// Lowered machine instructions in order corresponding to the original IR. insts: Vec, + /// A map from backwards instruction index to the user stack map for that + /// instruction. + /// + /// This is a sparse side table that only has entries for instructions that + /// are safepoints, and only for a subset of those that have an associated + /// user stack map. + user_stack_maps: FxHashMap, + /// Operands: pre-regalloc references to virtual registers with /// constraints, in one flattened array. This allows the regalloc /// to efficiently access all operands without requiring expensive @@ -251,7 +286,7 @@ impl VCodeBuilder { block_order: BlockLoweringOrder, constants: VCodeConstants, direction: VCodeBuildDirection, - ) -> VCodeBuilder { + ) -> Self { let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); VCodeBuilder { @@ -565,6 +600,17 @@ impl VCodeBuilder { self.vcode } + + /// Add a user stack map for the associated instruction. + pub fn add_user_stack_map( + &mut self, + inst: BackwardsInsnIndex, + entries: &[ir::UserStackMapEntry], + ) { + let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets()); + let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map); + debug_assert!(old_entry.is_none()); + } } /// Is this type a reference type? @@ -582,12 +628,13 @@ impl VCode { emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, - ) -> VCode { + ) -> Self { let n_blocks = block_order.lowered_order().len(); VCode { sigs, vreg_types: vec![], insts: Vec::with_capacity(10 * n_blocks), + user_stack_maps: FxHashMap::default(), operands: Vec::with_capacity(30 * n_blocks), operand_ranges: Ranges::with_capacity(10 * n_blocks), clobbers: FxHashMap::default(), @@ -864,7 +911,7 @@ impl VCode { // If this is a safepoint, compute a stack map // and pass it to the emit state. - if self.insts[iix.index()].is_safepoint() { + let stack_map_disasm = if self.insts[iix.index()].is_safepoint() { let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; // Find the contiguous range of // (progpoint, allocation) safepoint slot @@ -888,13 +935,36 @@ impl VCode { let slot = alloc.as_stack().unwrap(); safepoint_slots.push(slot); } - if !safepoint_slots.is_empty() { - let stack_map = self - .abi - .spillslots_to_stack_map(&safepoint_slots[..], &state); - state.pre_safepoint(stack_map); - } - } + + let stack_map = if safepoint_slots.is_empty() { + None + } else { + Some( + self.abi + .spillslots_to_stack_map(&safepoint_slots[..], &state), + ) + }; + + let (user_stack_map, user_stack_map_disasm) = { + // The `user_stack_maps` is keyed by reverse + // instruction index, so we must flip the + // index. We can't put this into a helper method + // due to borrowck issues because parts of + // `self` are borrowed mutably elsewhere in this + // function. + let index = iix.to_backwards_insn_index(self.num_insts()); + let user_stack_map = self.user_stack_maps.remove(&index); + let user_stack_map_disasm = + user_stack_map.as_ref().map(|m| format!(" ; {m:?}")); + (user_stack_map, user_stack_map_disasm) + }; + + state.pre_safepoint(stack_map, user_stack_map); + + user_stack_map_disasm + } else { + None + }; // If the instruction we are about to emit is // a return, place an epilogue at this point @@ -932,6 +1002,10 @@ impl VCode { &mut buffer, &mut state, ); + if let Some(stack_map_disasm) = stack_map_disasm { + disasm.push_str(&stack_map_disasm); + disasm.push('\n'); + } } } @@ -1014,6 +1088,12 @@ impl VCode { } } + debug_assert!( + self.user_stack_maps.is_empty(), + "any stack maps should have been consumed by instruction emission, still have: {:#?}", + self.user_stack_maps, + ); + // Do any optimizations on branches at tail of buffer, as if we had // bound one last label. buffer.optimize_branches(ctrl_plane); @@ -1224,6 +1304,12 @@ impl VCode { .map(|o| o.vreg()) .any(|vreg| self.facts[vreg.vreg()].is_some()) } + + /// Get the user stack map associated with the given forward instruction index. + pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> { + let index = inst.to_backwards_insn_index(self.num_insts()); + self.user_stack_maps.get(&index) + } } impl std::ops::Index for VCode { @@ -1385,6 +1471,9 @@ impl fmt::Debug for VCode { } } } + if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) { + writeln!(f, " {user_stack_map:?}")?; + } } } diff --git a/cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif b/cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif new file mode 100644 index 000000000000..5c1d93b6235d --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif @@ -0,0 +1,221 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target aarch64 + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x24, [sp, #-16]! +; stp x19, x22, [sp, #-16]! +; sub sp, sp, #16 +; block0: +; movz w9, #0 +; movz w8, #1 +; movz w0, #2 +; mov x10, sp +; str w9, [x10] +; mov x24, x9 +; add x9, sp, #4 +; str w8, [x9] +; mov x19, x8 +; add x10, sp, #8 +; str w0, [x10] +; mov x22, x0 +; mov x0, x24 +; bl 0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; mov x12, sp +; mov x0, x19 +; str w0, [x12] +; add x13, sp, #4 +; mov x0, x22 +; str w0, [x13] +; mov x22, x0 +; mov x0, x24 +; bl 0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; mov x15, sp +; mov x0, x22 +; str w0, [x15] +; mov x0, x19 +; bl 0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; mov x0, x22 +; bl 0 +; add sp, sp, #16 +; ldp x19, x22, [sp], #16 +; ldr x24, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; str x24, [sp, #-0x10]! +; stp x19, x22, [sp, #-0x10]! +; sub sp, sp, #0x10 +; block1: ; offset 0x14 +; mov w9, #0 +; mov w8, #1 +; mov w0, #2 +; mov x10, sp +; str w9, [x10] +; mov x24, x9 +; add x9, sp, #4 +; str w8, [x9] +; mov x19, x8 +; add x10, sp, #8 +; str w0, [x10] +; mov x22, x0 +; mov x0, x24 +; bl #0x48 ; reloc_external Call u0:0 0 +; mov x12, sp +; mov x0, x19 +; str w0, [x12] +; add x13, sp, #4 +; mov x0, x22 +; str w0, [x13] +; mov x22, x0 +; mov x0, x24 +; bl #0x6c ; reloc_external Call u0:0 0 +; mov x15, sp +; mov x0, x22 +; str w0, [x15] +; mov x0, x19 +; bl #0x80 ; reloc_external Call u0:0 0 +; mov x0, x22 +; bl #0x88 ; reloc_external Call u0:0 0 +; add sp, sp, #0x10 +; ldp x19, x22, [sp], #0x10 +; ldr x24, [sp], #0x10 +; ldp x29, x30, [sp], #0x10 +; ret + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp x23, x27, [sp, #-16]! +; stp x20, x21, [sp, #-16]! +; sub sp, sp, #128 +; block0: +; mov x12, sp +; strb w0, [x12] +; mov x23, x0 +; add x13, sp, #8 +; strh w1, [x13] +; mov x20, x1 +; add x14, sp, #16 +; str w2, [x14] +; mov x21, x2 +; add x15, sp, #20 +; str s0, [x15] +; str q0, [sp, #96] +; add x0, sp, #24 +; str x3, [x0] +; mov x27, x3 +; add x1, sp, #32 +; str d1, [x1] +; str q1, [sp, #112] +; bl 0 +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; mov x0, x23 +; mov x1, x20 +; mov x2, x21 +; mov x3, x27 +; ldr q0, [sp, #96] +; ldr q1, [sp, #112] +; add sp, sp, #128 +; ldp x20, x21, [sp], #16 +; ldp x23, x27, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; stp x23, x27, [sp, #-0x10]! +; stp x20, x21, [sp, #-0x10]! +; sub sp, sp, #0x80 +; block1: ; offset 0x14 +; mov x12, sp +; strb w0, [x12] +; mov x23, x0 +; add x13, sp, #8 +; strh w1, [x13] +; mov x20, x1 +; add x14, sp, #0x10 +; str w2, [x14] +; mov x21, x2 +; add x15, sp, #0x14 +; str s0, [x15] +; stur q0, [sp, #0x60] +; add x0, sp, #0x18 +; str x3, [x0] +; mov x27, x3 +; add x1, sp, #0x20 +; str d1, [x1] +; stur q1, [sp, #0x70] +; bl #0x5c ; reloc_external Call u0:0 0 +; mov x0, x23 +; mov x1, x20 +; mov x2, x21 +; mov x3, x27 +; ldur q0, [sp, #0x60] +; ldur q1, [sp, #0x70] +; add sp, sp, #0x80 +; ldp x20, x21, [sp], #0x10 +; ldp x23, x27, [sp], #0x10 +; ldp x29, x30, [sp], #0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif b/cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif new file mode 100644 index 000000000000..b265508b9622 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif @@ -0,0 +1,250 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target riscv64 + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; addi sp,sp,-48 +; sd s1,40(sp) +; sd s2,32(sp) +; sd s3,24(sp) +; block0: +; li a0,0 +; mv s3,a0 +; li a0,1 +; li a2,2 +; sw zero,0(slot) +; sw a0,4(slot) +; mv s1,a0 +; sw a2,8(slot) +; mv s2,a2 +; mv a0,s3 +; call userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; mv a0,s1 +; sw a0,0(slot) +; mv a0,s2 +; sw a0,4(slot) +; mv s2,a0 +; mv a0,s3 +; call userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; mv a0,s2 +; sw a0,0(slot) +; mv a0,s1 +; call userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; mv a0,s2 +; call userextname0 +; ld s1,40(sp) +; ld s2,32(sp) +; ld s3,24(sp) +; addi sp,sp,48 +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; addi sp, sp, -0x30 +; sd s1, 0x28(sp) +; sd s2, 0x20(sp) +; sd s3, 0x18(sp) +; block1: ; offset 0x20 +; mv a0, zero +; mv s3, a0 +; addi a0, zero, 1 +; addi a2, zero, 2 +; sw zero, 0(sp) +; sw a0, 4(sp) +; mv s1, a0 +; sw a2, 8(sp) +; mv s2, a2 +; mv a0, s3 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a0, s1 +; sw a0, 0(sp) +; mv a0, s2 +; sw a0, 4(sp) +; mv s2, a0 +; mv a0, s3 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a0, s2 +; sw a0, 0(sp) +; mv a0, s1 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a0, s2 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; ld s1, 0x28(sp) +; ld s2, 0x20(sp) +; ld s3, 0x18(sp) +; addi sp, sp, 0x30 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; addi sp,sp,-160 +; sd s1,152(sp) +; sd s2,144(sp) +; sd s6,136(sp) +; sd s8,128(sp) +; sd s10,120(sp) +; fsd fs0,112(sp) +; fsd fs2,104(sp) +; block0: +; mv s10,a4 +; sb a0,0(slot) +; mv s8,a0 +; sh a1,8(slot) +; mv s6,a1 +; sw a2,16(slot) +; mv s2,a2 +; fsw fa0,20(slot) +; fmv.d fs2,fa0 +; sd a3,24(slot) +; mv s1,a3 +; fsd fa1,32(slot) +; fmv.d fs0,fa1 +; call userextname0 +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; mv a2,s2 +; mv a4,s10 +; sw a2,0(a4) +; mv a3,s1 +; sd a3,8(a4) +; mv a0,s8 +; mv a1,s6 +; fmv.d fa0,fs2 +; fmv.d fa1,fs0 +; ld s1,152(sp) +; ld s2,144(sp) +; ld s6,136(sp) +; ld s8,128(sp) +; ld s10,120(sp) +; fld fs0,112(sp) +; fld fs2,104(sp) +; addi sp,sp,160 +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; addi sp, sp, -0xa0 +; sd s1, 0x98(sp) +; sd s2, 0x90(sp) +; sd s6, 0x88(sp) +; sd s8, 0x80(sp) +; sd s10, 0x78(sp) +; fsd fs0, 0x70(sp) +; fsd fs2, 0x68(sp) +; block1: ; offset 0x30 +; mv s10, a4 +; sb a0, 0(sp) +; mv s8, a0 +; sh a1, 8(sp) +; mv s6, a1 +; sw a2, 0x10(sp) +; mv s2, a2 +; fsw fa0, 0x14(sp) +; fmv.d fs2, fa0 +; sd a3, 0x18(sp) +; mv s1, a3 +; fsd fa1, 0x20(sp) +; fmv.d fs0, fa1 +; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0 +; jalr ra +; mv a2, s2 +; mv a4, s10 +; sw a2, 0(a4) +; mv a3, s1 +; sd a3, 8(a4) +; mv a0, s8 +; mv a1, s6 +; fmv.d fa0, fs2 +; fmv.d fa1, fs0 +; ld s1, 0x98(sp) +; ld s2, 0x90(sp) +; ld s6, 0x88(sp) +; ld s8, 0x80(sp) +; ld s10, 0x78(sp) +; fld fs0, 0x70(sp) +; fld fs2, 0x68(sp) +; addi sp, sp, 0xa0 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif b/cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif new file mode 100644 index 000000000000..911a93f61a97 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif @@ -0,0 +1,221 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target s390x + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; stmg %r7, %r15, 56(%r15) +; aghi %r15, -176 +; block0: +; lhi %r2, 0 +; lgr %r11, %r2 +; lhi %r2, 1 +; lgr %r7, %r2 +; lhi %r2, 2 +; lgr %r9, %r2 +; la %r2, 160(%r15) +; mvhi 0(%r2), 0 +; la %r3, 164(%r15) +; mvhi 0(%r3), 1 +; la %r4, 168(%r15) +; mvhi 0(%r4), 2 +; lgr %r2, %r11 +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; la %r2, 160(%r15) +; mvhi 0(%r2), 1 +; la %r3, 164(%r15) +; mvhi 0(%r3), 2 +; lgr %r2, %r11 +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; la %r5, 160(%r15) +; mvhi 0(%r5), 2 +; lgr %r2, %r7 +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; lgr %r2, %r9 +; brasl %r14, userextname0 +; lmg %r7, %r15, 232(%r15) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; stmg %r7, %r15, 0x38(%r15) +; aghi %r15, -0xb0 +; block1: ; offset 0xa +; lhi %r2, 0 +; lgr %r11, %r2 +; lhi %r2, 1 +; lgr %r7, %r2 +; lhi %r2, 2 +; lgr %r9, %r2 +; la %r2, 0xa0(%r15) +; mvhi 0(%r2), 0 +; la %r3, 0xa4(%r15) +; mvhi 0(%r3), 1 +; la %r4, 0xa8(%r15) +; mvhi 0(%r4), 2 +; lgr %r2, %r11 +; brasl %r14, 0x44 ; reloc_external PLTRel32Dbl u0:0 2 +; la %r2, 0xa0(%r15) +; mvhi 0(%r2), 1 +; la %r3, 0xa4(%r15) +; mvhi 0(%r3), 2 +; lgr %r2, %r11 +; brasl %r14, 0x62 ; reloc_external PLTRel32Dbl u0:0 2 +; la %r5, 0xa0(%r15) +; mvhi 0(%r5), 2 +; lgr %r2, %r7 +; brasl %r14, 0x76 ; reloc_external PLTRel32Dbl u0:0 2 +; lgr %r2, %r9 +; brasl %r14, 0x80 ; reloc_external PLTRel32Dbl u0:0 2 +; lmg %r7, %r15, 0xe8(%r15) +; br %r14 + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; stmg %r6, %r15, 48(%r15) +; aghi %r15, -352 +; std %f8, 288(%r15) +; std %f9, 296(%r15) +; std %f10, 304(%r15) +; std %f11, 312(%r15) +; std %f12, 320(%r15) +; std %f13, 328(%r15) +; std %f14, 336(%r15) +; std %f15, 344(%r15) +; block0: +; la %r9, 160(%r15) +; stc %r2, 0(%r9) +; lgr %r11, %r2 +; la %r2, 168(%r15) +; sth %r3, 0(%r2) +; lgr %r9, %r3 +; la %r2, 176(%r15) +; st %r4, 0(%r2) +; lgr %r7, %r4 +; la %r2, 180(%r15) +; ste %f0, 0(%r2) +; vst %v0, 256(%r15) +; la %r2, 184(%r15) +; stg %r5, 0(%r2) +; lgr %r6, %r5 +; la %r3, 192(%r15) +; std %f2, 0(%r3) +; vst %v2, 272(%r15) +; brasl %r14, userextname0 +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; lgr %r2, %r11 +; lgr %r3, %r9 +; lgr %r4, %r7 +; lgr %r5, %r6 +; vl %v0, 256(%r15) +; vl %v2, 272(%r15) +; ld %f8, 288(%r15) +; ld %f9, 296(%r15) +; ld %f10, 304(%r15) +; ld %f11, 312(%r15) +; ld %f12, 320(%r15) +; ld %f13, 328(%r15) +; ld %f14, 336(%r15) +; ld %f15, 344(%r15) +; lmg %r6, %r15, 400(%r15) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; stmg %r6, %r15, 0x30(%r15) +; aghi %r15, -0x160 +; std %f8, 0x120(%r15) +; std %f9, 0x128(%r15) +; std %f10, 0x130(%r15) +; std %f11, 0x138(%r15) +; std %f12, 0x140(%r15) +; std %f13, 0x148(%r15) +; std %f14, 0x150(%r15) +; std %f15, 0x158(%r15) +; block1: ; offset 0x2a +; la %r9, 0xa0(%r15) +; stc %r2, 0(%r9) +; lgr %r11, %r2 +; la %r2, 0xa8(%r15) +; sth %r3, 0(%r2) +; lgr %r9, %r3 +; la %r2, 0xb0(%r15) +; st %r4, 0(%r2) +; lgr %r7, %r4 +; la %r2, 0xb4(%r15) +; ste %f0, 0(%r2) +; vst %v0, 0x100(%r15) +; la %r2, 0xb8(%r15) +; stg %r5, 0(%r2) +; lgr %r6, %r5 +; la %r3, 0xc0(%r15) +; std %f2, 0(%r3) +; vst %v2, 0x110(%r15) +; brasl %r14, 0x78 ; reloc_external PLTRel32Dbl u0:0 2 +; lgr %r2, %r11 +; lgr %r3, %r9 +; lgr %r4, %r7 +; lgr %r5, %r6 +; vl %v0, 0x100(%r15) +; vl %v2, 0x110(%r15) +; ld %f8, 0x120(%r15) +; ld %f9, 0x128(%r15) +; ld %f10, 0x130(%r15) +; ld %f11, 0x138(%r15) +; ld %f12, 0x140(%r15) +; ld %f13, 0x148(%r15) +; ld %f14, 0x150(%r15) +; ld %f15, 0x158(%r15) +; lmg %r6, %r15, 0x190(%r15) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/x64/user_stack_maps.clif b/cranelift/filetests/filetests/isa/x64/user_stack_maps.clif new file mode 100644 index 000000000000..bddc779df2c7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/user_stack_maps.clif @@ -0,0 +1,241 @@ +test compile precise-output +set unwind_info=false +set enable_probestack=false +target x86_64 + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) + + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $48, %rsp +; movq %rbx, 16(%rsp) +; movq %r13, 24(%rsp) +; movq %r15, 32(%rsp) +; block0: +; xorl %edi, %edi, %edi +; movq %rdi, %r15 +; movl $1, %edi +; movq %rdi, %rbx +; movl $2, %edi +; movq %rdi, %r13 +; lea rsp(0 + virtual offset), %r11 +; movl $0, 0(%r11) +; lea rsp(4 + virtual offset), %rsi +; movl $1, 0(%rsi) +; lea rsp(8 + virtual offset), %rdi +; movl $2, 0(%rdi) +; movq %r15, %rdi +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] } +; lea rsp(0 + virtual offset), %rcx +; movl $1, 0(%rcx) +; lea rsp(4 + virtual offset), %rdx +; movl $2, 0(%rdx) +; movq %r15, %rdi +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] } +; lea rsp(0 + virtual offset), %r9 +; movl $2, 0(%r9) +; movq %rbx, %rdi +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] } +; movq %r13, %rdi +; call User(userextname0) +; movq 16(%rsp), %rbx +; movq 24(%rsp), %r13 +; movq 32(%rsp), %r15 +; addq %rsp, $48, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x30, %rsp +; movq %rbx, 0x10(%rsp) +; movq %r13, 0x18(%rsp) +; movq %r15, 0x20(%rsp) +; block1: ; offset 0x17 +; xorl %edi, %edi +; movq %rdi, %r15 +; movl $1, %edi +; movq %rdi, %rbx +; movl $2, %edi +; movq %rdi, %r13 +; leaq (%rsp), %r11 +; movl $0, (%r11) +; leaq 4(%rsp), %rsi +; movl $1, (%rsi) +; leaq 8(%rsp), %rdi +; movl $2, (%rdi) +; movq %r15, %rdi +; callq 0x55 ; reloc_external CallPCRel4 u0:0 -4 +; leaq (%rsp), %rcx +; movl $1, (%rcx) +; leaq 4(%rsp), %rdx +; movl $2, (%rdx) +; movq %r15, %rdi +; callq 0x72 ; reloc_external CallPCRel4 u0:0 -4 +; leaq (%rsp), %r9 +; movl $2, (%r9) +; movq %rbx, %rdi +; callq 0x85 ; reloc_external CallPCRel4 u0:0 -4 +; movq %r13, %rdi +; callq 0x8d ; reloc_external CallPCRel4 u0:0 -4 +; movq 0x10(%rsp), %rbx +; movq 0x18(%rsp), %r13 +; movq 0x20(%rsp), %r15 +; addq $0x30, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v4, ss2+4 + stack_store v3, ss3 + stack_store v5, ss3+8 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8] + return v0, v1, v2, v3, v4, v5 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $176, %rsp +; movq %rbx, 128(%rsp) +; movq %r12, 136(%rsp) +; movq %r13, 144(%rsp) +; movq %r14, 152(%rsp) +; movq %r15, 160(%rsp) +; block0: +; movq %r8, %r13 +; lea rsp(0 + virtual offset), %r8 +; movb %dil, 0(%r8) +; movq %rdi, %rbx +; lea rsp(8 + virtual offset), %r8 +; movw %si, 0(%r8) +; movq %rsi, %r14 +; lea rsp(16 + virtual offset), %r9 +; movl %edx, 0(%r9) +; movq %rdx, %r12 +; lea rsp(20 + virtual offset), %r10 +; movss %xmm0, 0(%r10) +; movdqu %xmm0, rsp(96 + virtual offset) +; lea rsp(24 + virtual offset), %r11 +; movq %rcx, 0(%r11) +; movq %rcx, %r15 +; lea rsp(32 + virtual offset), %rsi +; movsd %xmm1, 0(%rsi) +; movdqu %xmm1, rsp(112 + virtual offset) +; call User(userextname0) +; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] } +; movq %r12, %rdx +; movq %r13, %r8 +; movl %edx, 0(%r8) +; movq %r15, %rcx +; movq %rcx, 8(%r8) +; movq %rbx, %rax +; movq %r14, %rdx +; movdqu rsp(96 + virtual offset), %xmm0 +; movdqu rsp(112 + virtual offset), %xmm1 +; movq 128(%rsp), %rbx +; movq 136(%rsp), %r12 +; movq 144(%rsp), %r13 +; movq 152(%rsp), %r14 +; movq 160(%rsp), %r15 +; addq %rsp, $176, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0xb0, %rsp +; movq %rbx, 0x80(%rsp) +; movq %r12, 0x88(%rsp) +; movq %r13, 0x90(%rsp) +; movq %r14, 0x98(%rsp) +; movq %r15, 0xa0(%rsp) +; block1: ; offset 0x33 +; movq %r8, %r13 +; leaq (%rsp), %r8 +; movb %dil, (%r8) +; movq %rdi, %rbx +; leaq 8(%rsp), %r8 +; movw %si, (%r8) +; movq %rsi, %r14 +; leaq 0x10(%rsp), %r9 +; movl %edx, (%r9) +; movq %rdx, %r12 +; leaq 0x14(%rsp), %r10 +; movss %xmm0, (%r10) +; movdqu %xmm0, 0x60(%rsp) +; leaq 0x18(%rsp), %r11 +; movq %rcx, (%r11) +; movq %rcx, %r15 +; leaq 0x20(%rsp), %rsi +; movsd %xmm1, (%rsi) +; movdqu %xmm1, 0x70(%rsp) +; callq 0x86 ; reloc_external CallPCRel4 u0:0 -4 +; movq %r12, %rdx +; movq %r13, %r8 +; movl %edx, (%r8) +; movq %r15, %rcx +; movq %rcx, 8(%r8) +; movq %rbx, %rax +; movq %r14, %rdx +; movdqu 0x60(%rsp), %xmm0 +; movdqu 0x70(%rsp), %xmm1 +; movq 0x80(%rsp), %rbx +; movq 0x88(%rsp), %r12 +; movq 0x90(%rsp), %r13 +; movq 0x98(%rsp), %r14 +; movq 0xa0(%rsp), %r15 +; addq $0xb0, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq +