bytecodealliance · alexcrichton · Aug 25, 2021 · Aug 26, 2021 · Aug 26, 2021 · Aug 26, 2021
@@ -19,6 +19,36 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
     }
 }
 
+/// Generates the four instructions necessary for a small "jump veneer" which
+/// is used when relative 26-bit call instructions won't cut it and a longer
+/// jump is needed.
+///
+/// This generates:
+///
+/// ```ignore
+/// ldr  x16, 16
+/// adr  x17, 12
+/// add  x16, x16, x17
+/// br   x16
+/// ```
+///
+/// and the expectation is that the 8-byte immediate address to jump to is
+/// located after these instructions are encoded.
+///
+/// Note that this is part of the `MachBackend::gen_jump_veneer` contract.
+pub fn gen_jump_veneer() -> (u32, u32, u32, u32) {
+    (
+        // ldr x16, 16
+        enc_ldst_imm19(0b01011000, 16 / 4, xreg(16)),
+        // adr x17, 12
+        enc_adr(12, writable_xreg(17)),
+        // add x16, x16, x17
+        enc_arith_rrr(0b10001011_000, 0, writable_xreg(16), xreg(16), xreg(17)),
+        // br x16
+        enc_br(xreg(16)),
+    )
+}
+
 /// Memory addressing mode finalization: convert "special" modes (e.g.,
 /// generic arbitrary stack offset) into real addressing modes, possibly by
 /// emitting some helper instructions that come immediately before the use

@@ -161,6 +161,23 @@ impl MachBackend for AArch64Backend {
     fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
         Some(inst::unwind::systemv::create_cie())
     }
+
+    fn max_jump_veneer_size(&self) -> usize {
+        24 // 4 insns + 8-byte immediate
+    }
+
+    fn generate_jump_veneer(&self) -> (Vec<u8>, usize) {
+        let (a, b, c, d) = inst::emit::gen_jump_veneer();
+        let mut bytes = Vec::with_capacity(self.max_jump_veneer_size());
+        bytes.extend_from_slice(&a.to_le_bytes());
+        bytes.extend_from_slice(&b.to_le_bytes());
+        bytes.extend_from_slice(&c.to_le_bytes());
+        bytes.extend_from_slice(&d.to_le_bytes());
+        let imm_start = bytes.len();
+        bytes.extend_from_slice(&[0x00; 8]);
+        assert_eq!(bytes.len(), self.max_jump_veneer_size());
+        (bytes, imm_start)
+    }
 }
 
 /// Create a new `isa::Builder`.

@@ -115,6 +115,14 @@ impl MachBackend for Arm32Backend {
         // Carry flag clear.
         IntCC::UnsignedLessThan
     }
+
+    fn generate_jump_veneer(&self) -> (Vec<u8>, usize) {
+        panic!("not implemented for arm32 backend")
+    }
+
+    fn max_jump_veneer_size(&self) -> usize {
+        0
+    }
 }
 
 /// Create a new `isa::Builder`.

@@ -165,6 +165,17 @@ impl MachBackend for S390xBackend {
     fn map_reg_to_dwarf(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
         inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
     }
+
+    fn max_jump_veneer_size(&self) -> usize {
+        0
+    }
+
+    fn generate_jump_veneer(&self) -> (Vec<u8>, usize) {
+        panic!(
+            "jumps >= 2gb are not implemented yet on s390x, functions are \
+             too far apart to have a relative call between them"
+        );
+    }
 }
 
 /// Create a new `isa::Builder`.

@@ -4,9 +4,9 @@ use crate::ir::LibCall;
 use crate::ir::TrapCode;
 use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength};
 use crate::isa::x64::encoding::rex::{
-    emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc,
-    low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc, LegacyPrefixes, OpcodeMap,
-    RexFlags,
+    emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg,
+    encode_modrm, int_reg_enc, low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc,
+    LegacyPrefixes, OpcodeMap, RexFlags,
 };
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
@@ -56,6 +56,57 @@ fn emit_reloc(
     sink.add_reloc(srcloc, kind, name, addend);
 }
 
+/// Generates the instructions necessary for a small "jump veneer" which is
+/// used when relative 32-bit call instructions won't cut it and a longer jump
+/// is needed.
+///
+/// This generates:
+///
+/// ```ignore
+/// movabsq $val, %r10
+/// lea -15(%rip), %r11
+/// add %r10, %r11
+/// jmpq *%r11
+/// ```
+///
+/// Note that this is part of the `MachBackend::gen_jump_veneer` contract.
+pub fn gen_jump_veneer() -> (Vec<u8>, usize) {
+    let mut bytes = Vec::with_capacity(jump_veneer_size());
+
+    let r10 = int_reg_enc(regs::r10());
+    let r11 = int_reg_enc(regs::r11());
+
+    // movabsq $val, %r10
+    bytes.push(0x48 | ((r10 >> 3) & 1));
+    bytes.push(0xB8 | (r10 & 7));
+    let imm_pos = bytes.len();
+    bytes.extend_from_slice(&[0; 8]);
+
+    // lea -15(%rip), %r11
+    bytes.push(0x48 | ((r11 >> 3) & 1) << 2);
+    bytes.push(0x8d);
+    bytes.push(encode_modrm(0b00, r11 & 7, 0b101));
+    bytes.extend_from_slice(&i32::to_le_bytes(-15));
+
+    // add %r10, %r11
+    bytes.push(0x48 | (((r11 >> 3) & 1) << 2) | ((r10 >> 3) & 1));
+    bytes.push(0x01);
+    bytes.push(encode_modrm(0b11, r10 & 7, r11 & 7));
+
+    // jmpq *%r11
+    bytes.push(0x40 | ((r11 >> 3) & 1));
+    bytes.push(0xff);
+    bytes.push(0xe0 | (r11 & 7));
+
+    assert_eq!(bytes.len(), jump_veneer_size());
+    (bytes, imm_pos)
+}
+
+/// See `gen_jump_veneer`.
+pub fn jump_veneer_size() -> usize {
+    23
+}
+
 /// The top-level emit function.
 ///
 /// Important!  Do not add improved (shortened) encoding cases to existing

@@ -28,6 +28,8 @@ pub mod unwind;
 use args::*;
 use regs::{create_reg_universe_systemv, show_ireg_sized};
 
+pub use emit::{gen_jump_veneer, jump_veneer_size};
+
 //=============================================================================
 // Instructions (top level): definition
 

@@ -158,6 +158,14 @@ impl MachBackend for X64Backend {
     fn map_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
         inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
     }
+
+    fn max_jump_veneer_size(&self) -> usize {
+        inst::jump_veneer_size()
+    }
+
+    fn generate_jump_veneer(&self) -> (Vec<u8>, usize) {
+        inst::gen_jump_veneer()
+    }
 }
 
 /// Create a new `isa::Builder`.

@@ -425,6 +425,34 @@ pub trait MachBackend {
     fn map_reg_to_dwarf(&self, _: Reg) -> Result<u16, RegisterMappingError> {
         Err(RegisterMappingError::UnsupportedArchitecture)
     }
+
+    /// Generates as "veneer" which is used when a relative call instruction
+    /// cannot reach to the destination.
+    ///
+    /// Cranelift compiles wasm modules on a per-function basis entirely
+    /// isolated from all other functions. Functions also, ideally, use relative
+    /// calls between them to avoid needing relocation fixups when a module is
+    /// loaded and also having statically more predictable calls. These jumps,
+    /// however, may not always be able to reach the destination depending on
+    /// the final layout of the executable.
+    ///
+    /// This function is used to generate an executable code sequence which can
+    /// be used to jump to an arbitrary pointer-sized immediate. This is
+    /// only used when functions are too far apart to call each other with
+    /// relative call instructions.
+    ///
+    /// The first return of this function is the machine code of the veneer, and
+    /// the second argument is the offset, within the veneer, where an 8-byte
+    /// immediate needs to be written of the target destination. The veneer,
+    /// when jumped to, will add the 8-byte immediate to the address of the
+    /// 8-byte immediate and jump to that location. This means that the veneer
+    /// will do a relative jump to the final location, and the relative jump
+    /// uses a pointer-sized immediate to make the jump.
+    fn generate_jump_veneer(&self) -> (Vec<u8>, usize);
+
+    /// Returns the maximal size of the veneer returned by
+    /// `generate_jump_veneer`.
+    fn max_jump_veneer_size(&self) -> usize;
 }
 
 /// Expected unwind info type.

@@ -13,6 +13,20 @@ use wasmtime_environ::{CompilerBuilder, Setting, SettingKind};
 struct Builder {
     flags: settings::Builder,
     isa_flags: isa::Builder,
+    linkopts: LinkOptions,
+}
+
+#[derive(Clone, Default)]
+pub struct LinkOptions {
+    /// A debug-only setting used to synthetically insert 0-byte padding between
+    /// compiled functions to simulate huge compiled artifacts and exercise
+    /// logic related to jump veneers.
+    pub padding_between_functions: usize,
+
+    /// A debug-only setting used to force inter-function calls in a wasm module
+    /// to always go through "jump veneers" which are typically only generated
+    /// when functions are very far from each other.
+    pub force_jump_veneers: bool,
 }
 
 pub fn builder() -> Box<dyn CompilerBuilder> {
@@ -32,6 +46,7 @@ pub fn builder() -> Box<dyn CompilerBuilder> {
     Box::new(Builder {
         flags,
         isa_flags: cranelift_native::builder().expect("host machine is not a supported target"),
+        linkopts: LinkOptions::default(),
     })
 }
 
@@ -50,6 +65,17 @@ impl CompilerBuilder for Builder {
     }
 
     fn set(&mut self, name: &str, value: &str) -> Result<()> {
+        // Special wasmtime-cranelift-only settings first
+        if name == "wasmtime_linkopt_padding_between_functions" {
+            self.linkopts.padding_between_functions = value.parse()?;
+            return Ok(());
+        }
+        if name == "wasmtime_linkopt_force_jump_veneer" {
+            self.linkopts.force_jump_veneers = value.parse()?;
+            return Ok(());
+        }
+
+        // ... then forward this to Cranelift
         if let Err(err) = self.flags.set(name, value) {
             match err {
                 SetError::BadName(_) => {
@@ -80,7 +106,7 @@ impl CompilerBuilder for Builder {
             .isa_flags
             .clone()
             .finish(settings::Flags::new(self.flags.clone()));
-        Box::new(crate::compiler::Compiler::new(isa))
+        Box::new(crate::compiler::Compiler::new(isa, self.linkopts.clone()))
     }
 
     fn settings(&self) -> Vec<Setting> {