From e4f3c166285d28a2942784628b676833f0162027 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 17 Mar 2024 17:42:37 -0400 Subject: [PATCH] Omit non-needs_drop drop_in_place in vtables This replaces the drop_in_place reference with null in vtables. On librustc_driver.so, this drops about ~17k dynamic relocations from the output, since many vtables can now be placed in read-only memory, rather than having a relocated pointer included. This makes a tradeoff by adding a null check at vtable call sites. I'm not sure that's readily avoidable without changing the vtable format (e.g., so that we can use a pc-relative relocation instead of an absolute address, and avoid the dynamic relocation that way). But it seems likely that the check is cheap at runtime. --- compiler/rustc_codegen_ssa/src/meth.rs | 15 +- compiler/rustc_codegen_ssa/src/mir/block.rs | 203 +++++++++++--------- compiler/rustc_middle/src/ty/vtable.rs | 12 +- tests/codegen/vtable-loads.rs | 16 ++ 4 files changed, 152 insertions(+), 94 deletions(-) create mode 100644 tests/codegen/vtable-loads.rs diff --git a/compiler/rustc_codegen_ssa/src/meth.rs b/compiler/rustc_codegen_ssa/src/meth.rs index 4f7dc9968a13c..daddbb5646d50 100644 --- a/compiler/rustc_codegen_ssa/src/meth.rs +++ b/compiler/rustc_codegen_ssa/src/meth.rs @@ -13,7 +13,7 @@ impl<'a, 'tcx> VirtualIndex { VirtualIndex(index as u64) } - pub fn get_fn>( + pub fn get_optional_fn>( self, bx: &mut Bx, llvtable: Bx::Value, @@ -39,13 +39,24 @@ impl<'a, 'tcx> VirtualIndex { } else { let gep = bx.inbounds_ptradd(llvtable, bx.const_usize(vtable_byte_offset)); let ptr = bx.load(llty, gep, ptr_align); - bx.nonnull_metadata(ptr); // VTable loads are invariant. bx.set_invariant_load(ptr); ptr } } + pub fn get_fn>( + self, + bx: &mut Bx, + llvtable: Bx::Value, + ty: Ty<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + ) -> Bx::Value { + let ptr = self.get_optional_fn(bx, llvtable, ty, fn_abi); + bx.nonnull_metadata(ptr); + ptr + } + pub fn get_usize>( self, bx: &mut Bx, diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index d4d172c000f3f..2c7f6c2222a10 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -498,6 +498,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { &mut self, helper: TerminatorCodegenHelper<'tcx>, bx: &mut Bx, + source_info: &mir::SourceInfo, location: mir::Place<'tcx>, target: mir::BasicBlock, unwind: mir::UnwindAction, @@ -521,90 +522,109 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { args1 = [place.val.llval]; &args1[..] }; - let (drop_fn, fn_abi, drop_instance) = - match ty.kind() { - // FIXME(eddyb) perhaps move some of this logic into - // `Instance::resolve_drop_in_place`? - ty::Dynamic(_, _, ty::Dyn) => { - // IN THIS ARM, WE HAVE: - // ty = *mut (dyn Trait) - // which is: exists ( *mut T, Vtable ) - // args[0] args[1] - // - // args = ( Data, Vtable ) - // | - // v - // /-------\ - // | ... | - // \-------/ - // - let virtual_drop = Instance { - def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), - args: drop_fn.args, - }; - debug!("ty = {:?}", ty); - debug!("drop_fn = {:?}", drop_fn); - debug!("args = {:?}", args); - let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); - let vtable = args[1]; - // Truncate vtable off of args list - args = &args[..1]; - ( - meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) - .get_fn(bx, vtable, ty, fn_abi), - fn_abi, - virtual_drop, - ) - } - ty::Dynamic(_, _, ty::DynStar) => { - // IN THIS ARM, WE HAVE: - // ty = *mut (dyn* Trait) - // which is: *mut exists (T, Vtable) - // - // args = [ * ] - // | - // v - // ( Data, Vtable ) - // | - // v - // /-------\ - // | ... | - // \-------/ - // - // - // WE CAN CONVERT THIS INTO THE ABOVE LOGIC BY DOING - // - // data = &(*args[0]).0 // gives a pointer to Data above (really the same pointer) - // vtable = (*args[0]).1 // loads the vtable out - // (data, vtable) // an equivalent Rust `*mut dyn Trait` - // - // SO THEN WE CAN USE THE ABOVE CODE. - let virtual_drop = Instance { - def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), - args: drop_fn.args, - }; - debug!("ty = {:?}", ty); - debug!("drop_fn = {:?}", drop_fn); - debug!("args = {:?}", args); - let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); - let meta_ptr = place.project_field(bx, 1); - let meta = bx.load_operand(meta_ptr); - // Truncate vtable off of args list - args = &args[..1]; - debug!("args' = {:?}", args); - ( - meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) - .get_fn(bx, meta.immediate(), ty, fn_abi), - fn_abi, - virtual_drop, - ) - } - _ => ( - bx.get_fn_addr(drop_fn), - bx.fn_abi_of_instance(drop_fn, ty::List::empty()), - drop_fn, - ), - }; + let (maybe_null, drop_fn, fn_abi, drop_instance) = match ty.kind() { + // FIXME(eddyb) perhaps move some of this logic into + // `Instance::resolve_drop_in_place`? + ty::Dynamic(_, _, ty::Dyn) => { + // IN THIS ARM, WE HAVE: + // ty = *mut (dyn Trait) + // which is: exists ( *mut T, Vtable ) + // args[0] args[1] + // + // args = ( Data, Vtable ) + // | + // v + // /-------\ + // | ... | + // \-------/ + // + let virtual_drop = Instance { + def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), + args: drop_fn.args, + }; + debug!("ty = {:?}", ty); + debug!("drop_fn = {:?}", drop_fn); + debug!("args = {:?}", args); + let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); + let vtable = args[1]; + // Truncate vtable off of args list + args = &args[..1]; + ( + true, + meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) + .get_optional_fn(bx, vtable, ty, fn_abi), + fn_abi, + virtual_drop, + ) + } + ty::Dynamic(_, _, ty::DynStar) => { + // IN THIS ARM, WE HAVE: + // ty = *mut (dyn* Trait) + // which is: *mut exists (T, Vtable) + // + // args = [ * ] + // | + // v + // ( Data, Vtable ) + // | + // v + // /-------\ + // | ... | + // \-------/ + // + // + // WE CAN CONVERT THIS INTO THE ABOVE LOGIC BY DOING + // + // data = &(*args[0]).0 // gives a pointer to Data above (really the same pointer) + // vtable = (*args[0]).1 // loads the vtable out + // (data, vtable) // an equivalent Rust `*mut dyn Trait` + // + // SO THEN WE CAN USE THE ABOVE CODE. + let virtual_drop = Instance { + def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), + args: drop_fn.args, + }; + debug!("ty = {:?}", ty); + debug!("drop_fn = {:?}", drop_fn); + debug!("args = {:?}", args); + let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); + let meta_ptr = place.project_field(bx, 1); + let meta = bx.load_operand(meta_ptr); + // Truncate vtable off of args list + args = &args[..1]; + debug!("args' = {:?}", args); + ( + true, + meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) + .get_optional_fn(bx, meta.immediate(), ty, fn_abi), + fn_abi, + virtual_drop, + ) + } + _ => ( + false, + bx.get_fn_addr(drop_fn), + bx.fn_abi_of_instance(drop_fn, ty::List::empty()), + drop_fn, + ), + }; + + // We generate a null check for the drop_fn. This saves a bunch of relocations being + // generated for no-op drops. + if maybe_null { + let is_not_null = bx.append_sibling_block("is_not_null"); + let llty = bx.fn_ptr_backend_type(fn_abi); + let null = bx.const_null(llty); + let non_null = bx.icmp( + base::bin_op_to_icmp_predicate(mir::BinOp::Ne.to_hir_binop(), false), + drop_fn, + null, + ); + bx.cond_br(non_null, is_not_null, self.llbb(target)); + bx.switch_to_block(is_not_null); + self.set_debug_loc(bx, *source_info); + } + helper.do_call( self, bx, @@ -615,7 +635,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { unwind, &[], Some(drop_instance), - mergeable_succ, + !maybe_null && mergeable_succ, ) } @@ -1344,9 +1364,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { MergingSucc::False } - mir::TerminatorKind::Drop { place, target, unwind, replace: _ } => { - self.codegen_drop_terminator(helper, bx, place, target, unwind, mergeable_succ()) - } + mir::TerminatorKind::Drop { place, target, unwind, replace: _ } => self + .codegen_drop_terminator( + helper, + bx, + &terminator.source_info, + place, + target, + unwind, + mergeable_succ(), + ), mir::TerminatorKind::Assert { ref cond, expected, ref msg, target, unwind } => self .codegen_assert_terminator( diff --git a/compiler/rustc_middle/src/ty/vtable.rs b/compiler/rustc_middle/src/ty/vtable.rs index 62f41921d888a..dcfa7d172321e 100644 --- a/compiler/rustc_middle/src/ty/vtable.rs +++ b/compiler/rustc_middle/src/ty/vtable.rs @@ -83,10 +83,14 @@ pub(super) fn vtable_allocation_provider<'tcx>( let idx: u64 = u64::try_from(idx).unwrap(); let scalar = match entry { VtblEntry::MetadataDropInPlace => { - let instance = ty::Instance::resolve_drop_in_place(tcx, ty); - let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance); - let fn_ptr = Pointer::from(fn_alloc_id); - Scalar::from_pointer(fn_ptr, &tcx) + if ty.needs_drop(tcx, ty::ParamEnv::reveal_all()) { + let instance = ty::Instance::resolve_drop_in_place(tcx, ty); + let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance); + let fn_ptr = Pointer::from(fn_alloc_id); + Scalar::from_pointer(fn_ptr, &tcx) + } else { + Scalar::from_maybe_pointer(Pointer::null(), &tcx) + } } VtblEntry::MetadataSize => Scalar::from_uint(size, ptr_size), VtblEntry::MetadataAlign => Scalar::from_uint(align, ptr_size), diff --git a/tests/codegen/vtable-loads.rs b/tests/codegen/vtable-loads.rs new file mode 100644 index 0000000000000..1dd6ca51063b1 --- /dev/null +++ b/tests/codegen/vtable-loads.rs @@ -0,0 +1,16 @@ +//@ compile-flags: -O + +#![crate_type = "lib"] + +// CHECK-LABEL: @loop_skips_vtable_load +#[no_mangle] +pub fn loop_skips_vtable_load(x: &dyn Fn()) { + // CHECK: load ptr, ptr %0{{.*}}, !invariant.load + // CHECK-NEXT: tail call void %1 + // CHECK-NOT: load ptr + x(); + for _ in 0..100 { + // CHECK: tail call void %1 + x(); + } +}