From f604467cac6b4f44c0ff5c6c28ee7b1e624abeef Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 2 Jul 2021 10:59:49 +0200 Subject: [PATCH] Abort in panic_abort eh_personality This ensures that it is impossible to unwind through rust code in case of -Cpanic=abort --- library/panic_abort/src/emcc.rs | 54 +++++++++ library/panic_abort/src/gcc.rs | 192 ++++++++++++++++++++++++++++++++ library/panic_abort/src/lib.rs | 105 +++++++---------- 3 files changed, 284 insertions(+), 67 deletions(-) create mode 100644 library/panic_abort/src/emcc.rs create mode 100644 library/panic_abort/src/gcc.rs diff --git a/library/panic_abort/src/emcc.rs b/library/panic_abort/src/emcc.rs new file mode 100644 index 0000000000000..e56247c313c2d --- /dev/null +++ b/library/panic_abort/src/emcc.rs @@ -0,0 +1,54 @@ +//! Unwinding for *emscripten* target. +//! +//! Whereas Rust's usual unwinding implementation for Unix platforms +//! calls into the libunwind APIs directly, on Emscripten we instead +//! call into the C++ unwinding APIs. This is just an expedience since +//! Emscripten's runtime always implements those APIs and does not +//! implement libunwind. + +use libc::c_int; +use unwind as uw; + +// This matches the layout of std::type_info in C++ +#[repr(C)] +struct TypeInfo { + vtable: *const usize, + name: *const u8, +} +unsafe impl Sync for TypeInfo {} + +extern "C" { + // The leading `\x01` byte here is actually a magical signal to LLVM to + // *not* apply any other mangling like prefixing with a `_` character. + // + // This symbol is the vtable used by C++'s `std::type_info`. Objects of type + // `std::type_info`, type descriptors, have a pointer to this table. Type + // descriptors are referenced by the C++ EH structures defined above and + // that we construct below. + // + // Note that the real size is larger than 3 usize, but we only need our + // vtable to point to the third element. + #[link_name = "\x01_ZTVN10__cxxabiv117__class_type_infoE"] + static CLASS_TYPE_INFO_VTABLE: [usize; 3]; +} + +// std::type_info for a rust_panic class +#[lang = "eh_catch_typeinfo"] +static EXCEPTION_TYPE_INFO: TypeInfo = TypeInfo { + // Normally we would use .as_ptr().add(2) but this doesn't work in a const context. + vtable: unsafe { &CLASS_TYPE_INFO_VTABLE[2] }, + // This intentionally doesn't use the normal name mangling scheme because + // we don't want C++ to be able to produce or catch Rust panics. + name: b"rust_panic\0".as_ptr(), +}; + +#[rustc_std_internal_symbol] +unsafe extern "C" fn rust_eh_personality( + _version: c_int, + _actions: uw::_Unwind_Action, + _exception_class: uw::_Unwind_Exception_Class, + _exception_object: *mut uw::_Unwind_Exception, + _context: *mut uw::_Unwind_Context, +) -> uw::_Unwind_Reason_Code { + crate::do_abort(); +} diff --git a/library/panic_abort/src/gcc.rs b/library/panic_abort/src/gcc.rs new file mode 100644 index 0000000000000..a9ecc8fe45088 --- /dev/null +++ b/library/panic_abort/src/gcc.rs @@ -0,0 +1,192 @@ +//! Implementation of panics backed by libgcc/libunwind (in some form). +//! +//! For background on exception handling and stack unwinding please see +//! "Exception Handling in LLVM" (llvm.org/docs/ExceptionHandling.html) and +//! documents linked from it. +//! These are also good reads: +//! * +//! * +//! * +//! +//! ## A brief summary +//! +//! Exception handling happens in two phases: a search phase and a cleanup +//! phase. +//! +//! In both phases the unwinder walks stack frames from top to bottom using +//! information from the stack frame unwind sections of the current process's +//! modules ("module" here refers to an OS module, i.e., an executable or a +//! dynamic library). +//! +//! For each stack frame, it invokes the associated "personality routine", whose +//! address is also stored in the unwind info section. +//! +//! In the search phase, the job of a personality routine is to examine +//! exception object being thrown, and to decide whether it should be caught at +//! that stack frame. Once the handler frame has been identified, cleanup phase +//! begins. +//! +//! In the cleanup phase, the unwinder invokes each personality routine again. +//! This time it decides which (if any) cleanup code needs to be run for +//! the current stack frame. If so, the control is transferred to a special +//! branch in the function body, the "landing pad", which invokes destructors, +//! frees memory, etc. At the end of the landing pad, control is transferred +//! back to the unwinder and unwinding resumes. +//! +//! Once stack has been unwound down to the handler frame level, unwinding stops +//! and the last personality routine transfers control to the catch block. + +#![allow(nonstandard_style)] + +use libc::c_int; + +// The following code is based on GCC's C and C++ personality routines. For reference, see: +// https://github.com/gcc-mirror/gcc/blob/master/libstdc++-v3/libsupc++/eh_personality.cc +// https://github.com/gcc-mirror/gcc/blob/trunk/libgcc/unwind-c.c + +cfg_if::cfg_if! { + if #[cfg(all(target_arch = "arm", not(target_os = "ios"), not(target_os = "netbsd")))] { + // ARM EHABI personality routine. + // https://infocenter.arm.com/help/topic/com.arm.doc.ihi0038b/IHI0038B_ehabi.pdf + // + // iOS uses the default routine instead since it uses SjLj unwinding. + #[rustc_std_internal_symbol] + unsafe extern "C" fn rust_eh_personality(_state: _Unwind_State, + _exception_object: *mut _Unwind_Exception, + _context: *mut _Unwind_Context) + -> _Unwind_Reason_Code { + crate::do_abort() + } + } else { + cfg_if::cfg_if! { + if #[cfg(all(windows, target_arch = "x86_64", target_env = "gnu"))] { + // On x86_64 MinGW targets, the unwinding mechanism is SEH however the unwind + // handler data (aka LSDA) uses GCC-compatible encoding. + #[rustc_std_internal_symbol] + #[allow(nonstandard_style)] + unsafe extern "C" fn rust_eh_personality(_exceptionRecord: *mut EXCEPTION_RECORD, + _establisherFrame: LPVOID, + _contextRecord: *mut CONTEXT, + _dispatcherContext: *mut DISPATCHER_CONTEXT) + -> EXCEPTION_DISPOSITION { + crate::do_abort(); + } + } else { + // The personality routine for most of our targets. + #[rustc_std_internal_symbol] + unsafe extern "C" fn rust_eh_personality(_version: c_int, + _actions: _Unwind_Action, + _exception_class: _Unwind_Exception_Class, + _exception_object: *mut _Unwind_Exception, + _context: *mut _Unwind_Context) + -> _Unwind_Reason_Code { + crate::do_abort(); + } + } + } + } +} + +// Frame unwind info registration +// +// Each module's image contains a frame unwind info section (usually +// ".eh_frame"). When a module is loaded/unloaded into the process, the +// unwinder must be informed about the location of this section in memory. The +// methods of achieving that vary by the platform. On some (e.g., Linux), the +// unwinder can discover unwind info sections on its own (by dynamically +// enumerating currently loaded modules via the dl_iterate_phdr() API and +// finding their ".eh_frame" sections); Others, like Windows, require modules +// to actively register their unwind info sections via unwinder API. +// +// This module defines two symbols which are referenced and called from +// rsbegin.rs to register our information with the GCC runtime. The +// implementation of stack unwinding is (for now) deferred to libgcc_eh, however +// Rust crates use these Rust-specific entry points to avoid potential clashes +// with any GCC runtime. +#[cfg(all(target_os = "windows", target_arch = "x86", target_env = "gnu"))] +mod eh_frame_registry { + extern "C" { + fn __register_frame_info(eh_frame_begin: *const u8, object: *mut u8); + fn __deregister_frame_info(eh_frame_begin: *const u8, object: *mut u8); + } + + #[rustc_std_internal_symbol] + unsafe extern "C" fn rust_eh_register_frames(eh_frame_begin: *const u8, object: *mut u8) { + __register_frame_info(eh_frame_begin, object); + } + + #[rustc_std_internal_symbol] + unsafe extern "C" fn rust_eh_unregister_frames(eh_frame_begin: *const u8, object: *mut u8) { + __deregister_frame_info(eh_frame_begin, object); + } +} + +use libc::{c_void, uintptr_t}; + +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq)] +enum _Unwind_Reason_Code { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8, + _URC_FAILURE = 9, // used only by ARM EHABI +} + +type _Unwind_Exception_Class = u64; +type _Unwind_Word = uintptr_t; +type _Unwind_Ptr = uintptr_t; +type _Unwind_Trace_Fn = + extern "C" fn(ctx: *mut _Unwind_Context, arg: *mut c_void) -> _Unwind_Reason_Code; + +enum _Unwind_Exception {} + +enum _Unwind_Context {} + +type _Unwind_Exception_Cleanup_Fn = + extern "C" fn(unwind_code: _Unwind_Reason_Code, exception: *mut _Unwind_Exception); + +cfg_if::cfg_if! { +if #[cfg(any(target_os = "ios", target_os = "netbsd", not(target_arch = "arm")))] { + // Not ARM EHABI + #[repr(C)] + #[derive(Copy, Clone, PartialEq)] + enum _Unwind_Action { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16, + } + +} else { + // ARM EHABI + #[repr(C)] + #[derive(Copy, Clone, PartialEq)] + enum _Unwind_State { + _US_VIRTUAL_UNWIND_FRAME = 0, + _US_UNWIND_FRAME_STARTING = 1, + _US_UNWIND_FRAME_RESUME = 2, + _US_ACTION_MASK = 3, + _US_FORCE_UNWIND = 8, + _US_END_OF_STACK = 16, + } +} +} // cfg_if! + +cfg_if::cfg_if! { +if #[cfg(all(windows, target_arch = "x86_64", target_env = "gnu"))] { + // We declare these as opaque types. This is fine since you just need to + // pass them to _GCC_specific_handler and forget about them. + enum EXCEPTION_RECORD {} + type LPVOID = *mut c_void; + enum CONTEXT {} + enum DISPATCHER_CONTEXT {} + type EXCEPTION_DISPOSITION = c_int; +} +} // cfg_if! diff --git a/library/panic_abort/src/lib.rs b/library/panic_abort/src/lib.rs index d95ea6530c204..50d0358fe65f4 100644 --- a/library/panic_abort/src/lib.rs +++ b/library/panic_abort/src/lib.rs @@ -28,14 +28,21 @@ pub unsafe extern "C" fn __rust_panic_cleanup(_: *mut u8) -> *mut (dyn Any + Sen unreachable!() } -// "Leak" the payload and shim to the relevant abort on the platform in question. +/// "Leak" the payload and abort. #[rustc_std_internal_symbol] pub unsafe extern "C" fn __rust_start_panic(_payload: *mut &mut dyn BoxMeUp) -> u32 { // Android has the ability to attach a message as part of the abort. #[cfg(target_os = "android")] android::android_set_abort_message(_payload); - abort(); + do_abort(); +} + +/// Shim to the relevant abort on the platform in question. +fn do_abort() -> ! { + unsafe { + abort(); + } cfg_if::cfg_if! { if #[cfg(unix)] { @@ -86,70 +93,34 @@ pub unsafe extern "C" fn __rust_start_panic(_payload: *mut &mut dyn BoxMeUp) -> } } -// This... is a bit of an oddity. The tl;dr; is that this is required to link -// correctly, the longer explanation is below. -// -// Right now the binaries of libcore/libstd that we ship are all compiled with -// `-C panic=unwind`. This is done to ensure that the binaries are maximally -// compatible with as many situations as possible. The compiler, however, -// requires a "personality function" for all functions compiled with `-C -// panic=unwind`. This personality function is hardcoded to the symbol -// `rust_eh_personality` and is defined by the `eh_personality` lang item. -// -// So... why not just define that lang item here? Good question! The way that -// panic runtimes are linked in is actually a little subtle in that they're -// "sort of" in the compiler's crate store, but only actually linked if another -// isn't actually linked. This ends up meaning that both this crate and the -// panic_unwind crate can appear in the compiler's crate store, and if both -// define the `eh_personality` lang item then that'll hit an error. -// -// To handle this the compiler only requires the `eh_personality` is defined if -// the panic runtime being linked in is the unwinding runtime, and otherwise -// it's not required to be defined (rightfully so). In this case, however, this -// library just defines this symbol so there's at least some personality -// somewhere. -// -// Essentially this symbol is just defined to get wired up to libcore/libstd -// binaries, but it should never be called as we don't link in an unwinding -// runtime at all. -pub mod personalities { - #[rustc_std_internal_symbol] - #[cfg(not(any( - all(target_arch = "wasm32", not(target_os = "emscripten"),), - all(target_os = "windows", target_env = "gnu", target_arch = "x86_64",), - )))] - pub extern "C" fn rust_eh_personality() {} - - // On x86_64-pc-windows-gnu we use our own personality function that needs - // to return `ExceptionContinueSearch` as we're passing on all our frames. - #[rustc_std_internal_symbol] - #[cfg(all(target_os = "windows", target_env = "gnu", target_arch = "x86_64"))] - pub extern "C" fn rust_eh_personality( - _record: usize, - _frame: usize, - _context: usize, - _dispatcher: usize, - ) -> u32 { - 1 // `ExceptionContinueSearch` +cfg_if::cfg_if! { + if #[cfg(target_os = "emscripten")] { + #[path = "emcc.rs"] + mod imp; + } else if #[cfg(target_env = "msvc")] { + // This is required by the compiler to exist (e.g., it's a lang item), but + // it's never actually called by the compiler because __C_specific_handler + // or _except_handler3 is the personality function that is always used. + // Hence this is just an aborting stub. + #[rustc_std_internal_symbol] + fn rust_eh_personality() { + core::intrinsics::abort() + } + } else if #[cfg(any( + all(target_family = "windows", target_env = "gnu"), + target_os = "psp", + target_family = "unix", + all(target_vendor = "fortanix", target_env = "sgx"), + ))] { + #[path = "gcc.rs"] + mod imp; + } else { + // Targets that don't support unwinding. + // - arch=wasm32 + // - os=none ("bare metal" targets) + // - os=uefi + // - os=hermit + // - nvptx64-nvidia-cuda + // - arch=avr } - - // Similar to above, this corresponds to the `eh_catch_typeinfo` lang item - // that's only used on Emscripten currently. - // - // Since panics don't generate exceptions and foreign exceptions are - // currently UB with -C panic=abort (although this may be subject to - // change), any catch_unwind calls will never use this typeinfo. - #[rustc_std_internal_symbol] - #[allow(non_upper_case_globals)] - #[cfg(target_os = "emscripten")] - static rust_eh_catch_typeinfo: [usize; 2] = [0; 2]; - - // These two are called by our startup objects on i686-pc-windows-gnu, but - // they don't need to do anything so the bodies are nops. - #[rustc_std_internal_symbol] - #[cfg(all(target_os = "windows", target_env = "gnu", target_arch = "x86"))] - pub extern "C" fn rust_eh_register_frames() {} - #[rustc_std_internal_symbol] - #[cfg(all(target_os = "windows", target_env = "gnu", target_arch = "x86"))] - pub extern "C" fn rust_eh_unregister_frames() {} }