From 8ae5eadb22f378b6b1d277c4e7e978639b47838c Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Thu, 11 Jun 2020 17:52:09 +1200 Subject: [PATCH 1/2] [AVR] Correctly set the pointer address space when constructing pointers to functions This patch extends the existing `type_i8p` method so that it requires an explicit address space to be specified. Before this patch, the `type_i8p` method implcitily assumed the default address space, which is not a safe transformation on all targets, namely AVR. The Rust compiler already has support for tracking the "instruction address space" on a per-target basis. This patch extends the code generation routines so that an address space must always be specified. In my estimation, around 15% of the callers of `type_i8p` produced invalid code on AVR due to the loss of address space prior to LLVM final code generation. This would lead to unavoidable assertion errors relating to invalid bitcasts. With this patch, the address space is always either 1) explicitly set to the instruction address space because the logic is dealing with functions which must be placed there, or 2) explicitly set to the default address space 0 because the logic can only operate on data space pointers and thus we keep the existing semantics of assuming the default, "data" address space. --- src/librustc_codegen_llvm/abi.rs | 2 +- src/librustc_codegen_llvm/common.rs | 15 ++-- src/librustc_codegen_llvm/consts.rs | 12 ++- src/librustc_codegen_llvm/type_.rs | 16 ++-- src/librustc_codegen_ssa/meth.rs | 2 +- src/librustc_codegen_ssa/mir/mod.rs | 5 +- src/librustc_codegen_ssa/traits/type_.rs | 9 ++- src/librustc_target/abi/mod.rs | 17 +++- src/test/codegen/avr/avr-func-addrspace.rs | 93 ++++++++++++++++++++++ 9 files changed, 148 insertions(+), 23 deletions(-) create mode 100644 src/test/codegen/avr/avr-func-addrspace.rs diff --git a/src/librustc_codegen_llvm/abi.rs b/src/librustc_codegen_llvm/abi.rs index 099c402703d0..7857ccb613bf 100644 --- a/src/librustc_codegen_llvm/abi.rs +++ b/src/librustc_codegen_llvm/abi.rs @@ -366,7 +366,7 @@ impl<'tcx> FnAbiLlvmExt<'tcx> for FnAbi<'tcx, Ty<'tcx>> { unsafe { llvm::LLVMPointerType( self.llvm_type(cx), - cx.data_layout().instruction_address_space as c_uint, + cx.data_layout().instruction_address_space.0 as c_uint, ) } } diff --git a/src/librustc_codegen_llvm/common.rs b/src/librustc_codegen_llvm/common.rs index 64140747871f..0e1cd8e493d9 100644 --- a/src/librustc_codegen_llvm/common.rs +++ b/src/librustc_codegen_llvm/common.rs @@ -16,7 +16,7 @@ use rustc_middle::bug; use rustc_middle::mir::interpret::{Allocation, GlobalAlloc, Scalar}; use rustc_middle::ty::layout::TyAndLayout; use rustc_span::symbol::Symbol; -use rustc_target::abi::{self, HasDataLayout, LayoutOf, Pointer, Size}; +use rustc_target::abi::{self, AddressSpace, HasDataLayout, LayoutOf, Pointer, Size}; use libc::{c_char, c_uint}; use log::debug; @@ -244,7 +244,7 @@ impl ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> { } } Scalar::Ptr(ptr) => { - let base_addr = match self.tcx.global_alloc(ptr.alloc_id) { + let (base_addr, base_addr_space) = match self.tcx.global_alloc(ptr.alloc_id) { GlobalAlloc::Memory(alloc) => { let init = const_alloc_to_llvm(self, alloc); let value = match alloc.mutability { @@ -254,18 +254,21 @@ impl ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> { if !self.sess().fewer_names() { llvm::set_value_name(value, format!("{:?}", ptr.alloc_id).as_bytes()); } - value + (value, AddressSpace::DATA) } - GlobalAlloc::Function(fn_instance) => self.get_fn_addr(fn_instance), + GlobalAlloc::Function(fn_instance) => ( + self.get_fn_addr(fn_instance), + self.data_layout().instruction_address_space, + ), GlobalAlloc::Static(def_id) => { assert!(self.tcx.is_static(def_id)); assert!(!self.tcx.is_thread_local_static(def_id)); - self.get_static(def_id) + (self.get_static(def_id), AddressSpace::DATA) } }; let llval = unsafe { llvm::LLVMConstInBoundsGEP( - self.const_bitcast(base_addr, self.type_i8p()), + self.const_bitcast(base_addr, self.type_i8p_ext(base_addr_space)), &self.const_usize(ptr.offset.bytes()), 1, ) diff --git a/src/librustc_codegen_llvm/consts.rs b/src/librustc_codegen_llvm/consts.rs index e8d475405096..3e9e5d9c8c1f 100644 --- a/src/librustc_codegen_llvm/consts.rs +++ b/src/librustc_codegen_llvm/consts.rs @@ -13,14 +13,14 @@ use rustc_hir::def_id::DefId; use rustc_hir::Node; use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs}; use rustc_middle::mir::interpret::{ - read_target_uint, Allocation, ConstValue, ErrorHandled, Pointer, + read_target_uint, Allocation, ConstValue, ErrorHandled, GlobalAlloc, Pointer, }; use rustc_middle::mir::mono::MonoItem; use rustc_middle::ty::{self, Instance, Ty}; use rustc_middle::{bug, span_bug}; use rustc_span::symbol::sym; use rustc_span::Span; -use rustc_target::abi::{Align, HasDataLayout, LayoutOf, Primitive, Scalar, Size}; +use rustc_target::abi::{AddressSpace, Align, HasDataLayout, LayoutOf, Primitive, Scalar, Size}; use std::ffi::CStr; @@ -53,10 +53,16 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll ) .expect("const_alloc_to_llvm: could not read relocation pointer") as u64; + + let address_space = match cx.tcx.global_alloc(alloc_id) { + GlobalAlloc::Function(..) => cx.data_layout().instruction_address_space, + GlobalAlloc::Static(..) | GlobalAlloc::Memory(..) => AddressSpace::DATA, + }; + llvals.push(cx.scalar_to_backend( Pointer::new(alloc_id, Size::from_bytes(ptr_offset)).into(), &Scalar { value: Primitive::Pointer, valid_range: 0..=!0 }, - cx.type_i8p(), + cx.type_i8p_ext(address_space), )); next_offset = offset + pointer_size; } diff --git a/src/librustc_codegen_llvm/type_.rs b/src/librustc_codegen_llvm/type_.rs index 854eff317338..05e364884f66 100644 --- a/src/librustc_codegen_llvm/type_.rs +++ b/src/librustc_codegen_llvm/type_.rs @@ -15,7 +15,7 @@ use rustc_middle::bug; use rustc_middle::ty::layout::TyAndLayout; use rustc_middle::ty::Ty; use rustc_target::abi::call::{CastTarget, FnAbi, Reg}; -use rustc_target::abi::{Align, Integer, Size}; +use rustc_target::abi::{AddressSpace, Align, Integer, Size}; use std::fmt; use std::ptr; @@ -198,9 +198,13 @@ impl BaseTypeMethods<'tcx> for CodegenCx<'ll, 'tcx> { assert_ne!( self.type_kind(ty), TypeKind::Function, - "don't call ptr_to on function types, use ptr_to_llvm_type on FnAbi instead" + "don't call ptr_to on function types, use ptr_to_llvm_type on FnAbi instead or explicitly specify an address space if it makes sense" ); - ty.ptr_to() + ty.ptr_to(AddressSpace::DATA) + } + + fn type_ptr_to_ext(&self, ty: &'ll Type, address_space: AddressSpace) -> &'ll Type { + ty.ptr_to(address_space) } fn element_type(&self, ty: &'ll Type) -> &'ll Type { @@ -241,11 +245,11 @@ impl Type { } pub fn i8p_llcx(llcx: &llvm::Context) -> &Type { - Type::i8_llcx(llcx).ptr_to() + Type::i8_llcx(llcx).ptr_to(AddressSpace::DATA) } - fn ptr_to(&self) -> &Type { - unsafe { llvm::LLVMPointerType(&self, 0) } + fn ptr_to(&self, address_space: AddressSpace) -> &Type { + unsafe { llvm::LLVMPointerType(&self, address_space.0) } } } diff --git a/src/librustc_codegen_ssa/meth.rs b/src/librustc_codegen_ssa/meth.rs index cfa01280e5a9..bcc19c6a44bd 100644 --- a/src/librustc_codegen_ssa/meth.rs +++ b/src/librustc_codegen_ssa/meth.rs @@ -75,7 +75,7 @@ pub fn get_vtable<'tcx, Cx: CodegenMethods<'tcx>>( } // Not in the cache; build it. - let nullptr = cx.const_null(cx.type_i8p()); + let nullptr = cx.const_null(cx.type_i8p_ext(cx.data_layout().instruction_address_space)); let methods_root; let methods = if let Some(trait_ref) = trait_ref { diff --git a/src/librustc_codegen_ssa/mir/mod.rs b/src/librustc_codegen_ssa/mir/mod.rs index 00b4bf96afa5..f4d83e877108 100644 --- a/src/librustc_codegen_ssa/mir/mod.rs +++ b/src/librustc_codegen_ssa/mir/mod.rs @@ -6,6 +6,7 @@ use rustc_middle::mir::interpret::ErrorHandled; use rustc_middle::ty::layout::{FnAbiExt, HasTyCtxt, TyAndLayout}; use rustc_middle::ty::{self, Instance, Ty, TypeFoldable}; use rustc_target::abi::call::{FnAbi, PassMode}; +use rustc_target::abi::HasDataLayout; use std::iter; @@ -323,7 +324,9 @@ fn create_funclets<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // C++ personality function, but `catch (...)` has no type so // it's null. The 64 here is actually a bitfield which // represents that this is a catch-all block. - let null = bx.const_null(bx.type_i8p()); + let null = bx.const_null( + bx.type_i8p_ext(bx.cx().data_layout().instruction_address_space), + ); let sixty_four = bx.const_i32(64); funclet = cp_bx.catch_pad(cs, &[null, sixty_four, null]); cp_bx.br(llbb); diff --git a/src/librustc_codegen_ssa/traits/type_.rs b/src/librustc_codegen_ssa/traits/type_.rs index c55bf9858b97..726d948cfd40 100644 --- a/src/librustc_codegen_ssa/traits/type_.rs +++ b/src/librustc_codegen_ssa/traits/type_.rs @@ -7,7 +7,7 @@ use rustc_middle::ty::layout::TyAndLayout; use rustc_middle::ty::{self, Ty}; use rustc_span::DUMMY_SP; use rustc_target::abi::call::{ArgAbi, CastTarget, FnAbi, Reg}; -use rustc_target::abi::Integer; +use rustc_target::abi::{AddressSpace, Integer}; // This depends on `Backend` and not `BackendTypes`, because consumers will probably want to use // `LayoutOf` or `HasTyCtxt`. This way, they don't have to add a constraint on it themselves. @@ -27,6 +27,7 @@ pub trait BaseTypeMethods<'tcx>: Backend<'tcx> { fn type_struct(&self, els: &[Self::Type], packed: bool) -> Self::Type; fn type_kind(&self, ty: Self::Type) -> TypeKind; fn type_ptr_to(&self, ty: Self::Type) -> Self::Type; + fn type_ptr_to_ext(&self, ty: Self::Type, address_space: AddressSpace) -> Self::Type; fn element_type(&self, ty: Self::Type) -> Self::Type; /// Returns the number of elements in `self` if it is a LLVM vector type. @@ -42,7 +43,11 @@ pub trait BaseTypeMethods<'tcx>: Backend<'tcx> { pub trait DerivedTypeMethods<'tcx>: BaseTypeMethods<'tcx> + MiscMethods<'tcx> { fn type_i8p(&self) -> Self::Type { - self.type_ptr_to(self.type_i8()) + self.type_i8p_ext(AddressSpace::DATA) + } + + fn type_i8p_ext(&self, address_space: AddressSpace) -> Self::Type { + self.type_ptr_to_ext(self.type_i8(), address_space) } fn type_int(&self) -> Self::Type { diff --git a/src/librustc_target/abi/mod.rs b/src/librustc_target/abi/mod.rs index c79e9bb28900..a570bd914ae6 100644 --- a/src/librustc_target/abi/mod.rs +++ b/src/librustc_target/abi/mod.rs @@ -32,7 +32,7 @@ pub struct TargetDataLayout { /// Alignments for vector types. pub vector_align: Vec<(Size, AbiAndPrefAlign)>, - pub instruction_address_space: u32, + pub instruction_address_space: AddressSpace, } impl Default for TargetDataLayout { @@ -56,7 +56,7 @@ impl Default for TargetDataLayout { (Size::from_bits(64), AbiAndPrefAlign::new(align(64))), (Size::from_bits(128), AbiAndPrefAlign::new(align(128))), ], - instruction_address_space: 0, + instruction_address_space: AddressSpace::DATA, } } } @@ -65,7 +65,7 @@ impl TargetDataLayout { pub fn parse(target: &Target) -> Result { // Parse an address space index from a string. let parse_address_space = |s: &str, cause: &str| { - s.parse::().map_err(|err| { + s.parse::().map(AddressSpace).map_err(|err| { format!("invalid address space `{}` for `{}` in \"data-layout\": {}", s, cause, err) }) }; @@ -744,6 +744,17 @@ impl FieldsShape { } } +/// An identifier that specifies the address space that some operation +/// should operate on. Special address spaces have an effect on code generation, +/// depending on the target and the address spaces it implements. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct AddressSpace(pub u32); + +impl AddressSpace { + /// The default address space, corresponding to data space. + pub const DATA: Self = AddressSpace(0); +} + /// Describes how values of the type are passed by target ABIs, /// in terms of categories of C types there are ABI rules for. #[derive(Clone, PartialEq, Eq, Hash, Debug, HashStable_Generic)] diff --git a/src/test/codegen/avr/avr-func-addrspace.rs b/src/test/codegen/avr/avr-func-addrspace.rs new file mode 100644 index 000000000000..7759d9603a5a --- /dev/null +++ b/src/test/codegen/avr/avr-func-addrspace.rs @@ -0,0 +1,93 @@ +// compile-flags: -O --target=avr-unknown-unknown --crate-type=rlib + +// This test validates that function pointers can be stored in global variables +// and called upon. It ensures that Rust emits function pointers in the correct +// address space to LLVM so that an assertion error relating to casting is +// not triggered. +// +// It also validates that functions can be called through function pointers +// through traits. + +#![feature(no_core, lang_items, unboxed_closures, arbitrary_self_types)] +#![crate_type = "lib"] +#![no_core] + +#[lang = "sized"] +pub trait Sized { } +#[lang = "copy"] +pub trait Copy { } +#[lang = "receiver"] +pub trait Receiver { } + +pub struct Result { _a: T, _b: E } + +impl Copy for usize {} + +#[lang = "drop_in_place"] +pub unsafe fn drop_in_place(_: *mut T) {} + +#[lang = "fn_once"] +pub trait FnOnce { + #[lang = "fn_once_output"] + type Output; + + extern "rust-call" fn call_once(self, args: Args) -> Self::Output; +} + +#[lang = "fn_mut"] +pub trait FnMut : FnOnce { + extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output; +} + +#[lang = "fn"] +pub trait Fn: FnOnce { + /// Performs the call operation. + extern "rust-call" fn call(&self, args: Args) -> Self::Output; +} + +impl<'a, A, R> FnOnce for &'a fn(A) -> R { + type Output = R; + + extern "rust-call" fn call_once(self, args: A) -> R { + (*self)(args) + } +} + +pub static mut STORAGE_FOO: fn(&usize, &mut u32) -> Result<(), ()> = arbitrary_black_box; +pub static mut STORAGE_BAR: u32 = 12; + +fn arbitrary_black_box(ptr: &usize, _: &mut u32) -> Result<(), ()> { + let raw_ptr = ptr as *const usize; + let _v: usize = unsafe { *raw_ptr }; + loop {} +} + +#[inline(never)] +#[no_mangle] +fn call_through_fn_trait(a: &mut impl Fn<(), Output=()>) { + (*a)() +} + +#[inline(never)] +fn update_bar_value() { + unsafe { + STORAGE_BAR = 88; + } +} + +// CHECK: define void @test(){{.+}}addrspace(1) +#[no_mangle] +pub extern "C" fn test() { + let mut buf = 7; + + // A call through the Fn trait must use address space 1. + // + // CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait() + call_through_fn_trait(&mut update_bar_value); + + // A call through a global variable must use address space 1. + // CHECK: load {{.*}}addrspace(1){{.+}}FOO + unsafe { + STORAGE_FOO(&1, &mut buf); + } +} From 5581ce6c10ae0b4e6503db0081e2defd7ef829ff Mon Sep 17 00:00:00 2001 From: Dylan McKay Date: Fri, 19 Jun 2020 19:04:30 +1200 Subject: [PATCH 2/2] [AVR] Ensure that function pointers stored within aggregates are annotated with the correct space Before this patch, a function pointer stored within an aggregate like a struct or an enum would always have the default address space `0`. This patch removes this assumption and instead, introspects the inner type being pointed at, storing the target address space in the PointeeInfo struct so that downstream users may query it. --- src/librustc_codegen_llvm/type_of.rs | 15 +++++++------ src/librustc_middle/ty/layout.rs | 33 ++++++++++++++++++++++++---- src/librustc_target/abi/mod.rs | 5 +++-- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/librustc_codegen_llvm/type_of.rs b/src/librustc_codegen_llvm/type_of.rs index 5a0da6be5980..1d0adc5783f3 100644 --- a/src/librustc_codegen_llvm/type_of.rs +++ b/src/librustc_codegen_llvm/type_of.rs @@ -7,7 +7,7 @@ use rustc_middle::bug; use rustc_middle::ty::layout::{FnAbiExt, TyAndLayout}; use rustc_middle::ty::print::obsolete::DefPathBasedNames; use rustc_middle::ty::{self, Ty, TypeFoldable}; -use rustc_target::abi::{Abi, Align, FieldsShape}; +use rustc_target::abi::{Abi, AddressSpace, Align, FieldsShape}; use rustc_target::abi::{Int, Pointer, F32, F64}; use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAndLayoutMethods, Variants}; @@ -310,12 +310,13 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> { F64 => cx.type_f64(), Pointer => { // If we know the alignment, pick something better than i8. - let pointee = if let Some(pointee) = self.pointee_info_at(cx, offset) { - cx.type_pointee_for_align(pointee.align) - } else { - cx.type_i8() - }; - cx.type_ptr_to(pointee) + let (pointee, address_space) = + if let Some(pointee) = self.pointee_info_at(cx, offset) { + (cx.type_pointee_for_align(pointee.align), pointee.address_space) + } else { + (cx.type_i8(), AddressSpace::DATA) + }; + cx.type_ptr_to_ext(pointee, address_space) } } } diff --git a/src/librustc_middle/ty/layout.rs b/src/librustc_middle/ty/layout.rs index cb937bf0112a..dc775b15927f 100644 --- a/src/librustc_middle/ty/layout.rs +++ b/src/librustc_middle/ty/layout.rs @@ -2166,16 +2166,31 @@ where } fn pointee_info_at(this: TyAndLayout<'tcx>, cx: &C, offset: Size) -> Option { - match this.ty.kind { + let addr_space_of_ty = |ty: Ty<'tcx>| { + if ty.is_fn() { cx.data_layout().instruction_address_space } else { AddressSpace::DATA } + }; + + let pointee_info = match this.ty.kind { ty::RawPtr(mt) if offset.bytes() == 0 => { cx.layout_of(mt.ty).to_result().ok().map(|layout| PointeeInfo { size: layout.size, align: layout.align.abi, safe: None, + address_space: addr_space_of_ty(mt.ty), + }) + } + ty::FnPtr(fn_sig) if offset.bytes() == 0 => { + cx.layout_of(cx.tcx().mk_fn_ptr(fn_sig)).to_result().ok().map(|layout| { + PointeeInfo { + size: layout.size, + align: layout.align.abi, + safe: None, + address_space: cx.data_layout().instruction_address_space, + } }) } - ty::Ref(_, ty, mt) if offset.bytes() == 0 => { + let address_space = addr_space_of_ty(ty); let tcx = cx.tcx(); let is_freeze = ty.is_freeze(tcx.at(DUMMY_SP), cx.param_env()); let kind = match mt { @@ -2210,6 +2225,7 @@ where size: layout.size, align: layout.align.abi, safe: Some(kind), + address_space, }) } @@ -2254,7 +2270,9 @@ where result = field.to_result().ok().and_then(|field| { if ptr_end <= field_start + field.size { // We found the right field, look inside it. - field.pointee_info_at(cx, offset - field_start) + let field_info = + field.pointee_info_at(cx, offset - field_start); + field_info } else { None } @@ -2277,7 +2295,14 @@ where result } - } + }; + + debug!( + "pointee_info_at (offset={:?}, type kind: {:?}) => {:?}", + offset, this.ty.kind, pointee_info + ); + + pointee_info } } diff --git a/src/librustc_target/abi/mod.rs b/src/librustc_target/abi/mod.rs index a570bd914ae6..b3e5f5c0c74b 100644 --- a/src/librustc_target/abi/mod.rs +++ b/src/librustc_target/abi/mod.rs @@ -1024,7 +1024,7 @@ impl MaybeResult for Result { } } -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum PointerKind { /// Most general case, we know no restrictions to tell LLVM. Shared, @@ -1039,11 +1039,12 @@ pub enum PointerKind { UniqueOwned, } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub struct PointeeInfo { pub size: Size, pub align: Align, pub safe: Option, + pub address_space: AddressSpace, } pub trait TyAndLayoutMethods<'a, C: LayoutOf>: Sized {