Skip to content

Commit

Permalink
miri: treat non-memory local variables properly for data race detection
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfJung committed Aug 31, 2024
1 parent fa72f07 commit 2894ff7
Show file tree
Hide file tree
Showing 15 changed files with 411 additions and 13 deletions.
21 changes: 20 additions & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,10 +549,29 @@ pub trait Machine<'tcx>: Sized {
Ok(ReturnAction::Normal)
}

/// Called immediately after an "immediate" local variable is read
/// (i.e., this is called for reads that do not end up accessing addressable memory).
#[inline(always)]
fn after_local_read(_ecx: &InterpCx<'tcx, Self>, _local: mir::Local) -> InterpResult<'tcx> {
Ok(())
}

/// Called immediately after an "immediate" local variable is assigned a new value
/// (i.e., this is called for writes that do not end up in memory).
/// `storage_live` indicates whether this is the initial write upon `StorageLive`.
#[inline(always)]
fn after_local_write(
_ecx: &mut InterpCx<'tcx, Self>,
_local: mir::Local,
_storage_live: bool,
) -> InterpResult<'tcx> {
Ok(())
}

/// Called immediately after actual memory was allocated for a local
/// but before the local's stack frame is updated to point to that memory.
#[inline(always)]
fn after_local_allocated(
fn after_local_moved_to_memory(
_ecx: &mut InterpCx<'tcx, Self>,
_local: mir::Local,
_mplace: &MPlaceTy<'tcx, Self::Provenance>,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_const_eval/src/interpret/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
if matches!(op, Operand::Immediate(_)) {
assert!(!layout.is_unsized());
}
M::after_local_read(self, local)?;
Ok(OpTy { op, layout })
}

Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_const_eval/src/interpret/place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,15 +506,13 @@ where
&self,
local: mir::Local,
) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> {
// Other parts of the system rely on `Place::Local` never being unsized.
// So we eagerly check here if this local has an MPlace, and if yes we use it.
let frame = self.frame();
let layout = self.layout_of_local(frame, local, None)?;
let place = if layout.is_sized() {
// We can just always use the `Local` for sized values.
Place::Local { local, offset: None, locals_addr: frame.locals_addr() }
} else {
// Unsized `Local` isn't okay (we cannot store the metadata).
// Other parts of the system rely on `Place::Local` never being unsized.
match frame.locals[local].access()? {
Operand::Immediate(_) => bug!(),
Operand::Indirect(mplace) => Place::Ptr(*mplace),
Expand Down Expand Up @@ -626,6 +624,8 @@ where
Operand::Immediate(local_val) => {
// Local can be updated in-place.
*local_val = src;
// Call the machine hook (the data race detector needs to know about this write).
M::after_local_write(self, local, /*storage_live*/ false)?;
// Double-check that the value we are storing and the local fit to each other.
// (*After* doing the update for borrow checker reasons.)
if cfg!(debug_assertions) {
Expand Down Expand Up @@ -944,7 +944,7 @@ where
mplace.mplace,
)?;
}
M::after_local_allocated(self, local, &mplace)?;
M::after_local_moved_to_memory(self, local, &mplace)?;
// Now we can call `access_mut` again, asserting it goes well, and actually
// overwrite things. This points to the entire allocation, not just the part
// the place refers to, i.e. we do this before we apply `offset`.
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_const_eval/src/interpret/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,11 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let dest_place = self.allocate_dyn(layout, MemoryKind::Stack, meta)?;
Operand::Indirect(*dest_place.mplace())
} else {
assert!(!meta.has_meta()); // we're dropping the metadata
// Just make this an efficient immediate.
assert!(!meta.has_meta()); // we're dropping the metadata
// Make sure the machine knows this "write" is happening. (This is important so that
// races involving local variable allocation can be detected by Miri.)
M::after_local_write(self, local, /*storage_live*/ true)?;
// Note that not calling `layout_of` here does have one real consequence:
// if the type is too big, we'll only notice this when the local is actually initialized,
// which is a bit too late -- we should ideally notice this already here, when the memory
Expand Down
98 changes: 98 additions & 0 deletions src/tools/miri/src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ use std::{
};

use rustc_ast::Mutability;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxHashSet;
use rustc_index::{Idx, IndexVec};
use rustc_middle::{mir, ty::Ty};
Expand Down Expand Up @@ -1121,6 +1122,103 @@ impl VClockAlloc {
}
}

/// Vector clock state for a stack frame (tracking the local variables
/// that do not have an allocation yet).
#[derive(Debug, Default)]
pub struct FrameState {
local_clocks: RefCell<FxHashMap<mir::Local, LocalClocks>>,
}

/// Stripped-down version of [`MemoryCellClocks`] for the clocks we need to keep track
/// of in a local that does not yet have addressable memory -- and hence can only
/// be accessed from the thread its stack frame belongs to, and cannot be access atomically.
#[derive(Debug)]
struct LocalClocks {
write: VTimestamp,
write_type: NaWriteType,
read: VTimestamp,
}

impl Default for LocalClocks {
fn default() -> Self {
Self { write: VTimestamp::ZERO, write_type: NaWriteType::Allocate, read: VTimestamp::ZERO }
}
}

impl FrameState {
pub fn local_write(&self, local: mir::Local, storage_live: bool, machine: &MiriMachine<'_>) {
let current_span = machine.current_span();
let global = machine.data_race.as_ref().unwrap();
if global.race_detecting() {
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
// This should do the same things as `MemoryCellClocks::write_race_detect`.
if !current_span.is_dummy() {
thread_clocks.clock.index_mut(index).span = current_span;
}
let mut clocks = self.local_clocks.borrow_mut();
if storage_live {
let new_clocks = LocalClocks {
write: thread_clocks.clock[index],
write_type: NaWriteType::Allocate,
read: VTimestamp::ZERO,
};
// There might already be an entry in the map for this, if the local was previously
// live already.
clocks.insert(local, new_clocks);
} else {
// This can fail to exist if `race_detecting` was false when the allocation
// occurred, in which case we can backdate this to the beginning of time.
let clocks = clocks.entry(local).or_insert_with(Default::default);
clocks.write = thread_clocks.clock[index];
clocks.write_type = NaWriteType::Write;
}
}
}

pub fn local_read(&self, local: mir::Local, machine: &MiriMachine<'_>) {
let current_span = machine.current_span();
let global = machine.data_race.as_ref().unwrap();
if global.race_detecting() {
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
// This should do the same things as `MemoryCellClocks::read_race_detect`.
if !current_span.is_dummy() {
thread_clocks.clock.index_mut(index).span = current_span;
}
thread_clocks.clock.index_mut(index).set_read_type(NaReadType::Read);
// This can fail to exist if `race_detecting` was false when the allocation
// occurred, in which case we can backdate this to the beginning of time.
let mut clocks = self.local_clocks.borrow_mut();
let clocks = clocks.entry(local).or_insert_with(Default::default);
clocks.read = thread_clocks.clock[index];
}
}

pub fn local_moved_to_memory(
&self,
local: mir::Local,
alloc: &mut VClockAlloc,
machine: &MiriMachine<'_>,
) {
let global = machine.data_race.as_ref().unwrap();
if global.race_detecting() {
let (index, _thread_clocks) = global.active_thread_state_mut(&machine.threads);
// Get the time the last write actually happened. This can fail to exist if
// `race_detecting` was false when the write occurred, in that case we can backdate this
// to the beginning of time.
let local_clocks = self.local_clocks.borrow_mut().remove(&local).unwrap_or_default();
for (_mem_clocks_range, mem_clocks) in alloc.alloc_ranges.get_mut().iter_mut_all() {
// The initialization write for this already happened, just at the wrong timestamp.
// Check that the thread index matches what we expect.
assert_eq!(mem_clocks.write.0, index);
// Convert the local's clocks into memory clocks.
mem_clocks.write = (index, local_clocks.write);
mem_clocks.write_type = local_clocks.write_type;
mem_clocks.read = VClock::new_with_index(index, local_clocks.read);
}
}
}
}

impl<'tcx> EvalContextPrivExt<'tcx> for MiriInterpCx<'tcx> {}
trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
/// Temporarily allow data-races to occur. This should only be used in
Expand Down
4 changes: 3 additions & 1 deletion src/tools/miri/src/concurrency/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,9 @@ impl<'tcx> ThreadManager<'tcx> {
}

/// Mutably borrow the stack of the active thread.
fn active_thread_stack_mut(&mut self) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
pub fn active_thread_stack_mut(
&mut self,
) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
&mut self.threads[self.active_thread].stack
}
pub fn all_stacks(
Expand Down
6 changes: 6 additions & 0 deletions src/tools/miri/src/concurrency/vector_clock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,19 @@ impl Ord for VTimestamp {
/// also this means that there is only one unique valid length
/// for each set of vector clock values and hence the PartialEq
/// and Eq derivations are correct.
///
/// This means we cannot represent a clock where the last entry is a timestamp-0 read that occurs
/// because of a retag. That's fine, all it does is risk wrong diagnostics in a extreme corner case.
#[derive(PartialEq, Eq, Default, Debug)]
pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);

impl VClock {
/// Create a new vector-clock containing all zeros except
/// for a value at the given index
pub(super) fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
if timestamp.time() == 0 {
return VClock::default();
}
let len = index.index() + 1;
let mut vec = smallvec::smallvec![VTimestamp::ZERO; len];
vec[index.index()] = timestamp;
Expand Down
55 changes: 50 additions & 5 deletions src/tools/miri/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,42 @@ pub struct FrameExtra<'tcx> {
/// an additional bit of "salt" into the cache key. This salt is fixed per-frame
/// so that within a call, a const will have a stable address.
salt: usize,

/// Data race detector per-frame data.
pub data_race: Option<data_race::FrameState>,
}

impl<'tcx> std::fmt::Debug for FrameExtra<'tcx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Omitting `timing`, it does not support `Debug`.
let FrameExtra { borrow_tracker, catch_unwind, timing: _, is_user_relevant: _, salt: _ } =
self;
let FrameExtra {
borrow_tracker,
catch_unwind,
timing: _,
is_user_relevant,
salt,
data_race,
} = self;
f.debug_struct("FrameData")
.field("borrow_tracker", borrow_tracker)
.field("catch_unwind", catch_unwind)
.field("is_user_relevant", is_user_relevant)
.field("salt", salt)
.field("data_race", data_race)
.finish()
}
}

impl VisitProvenance for FrameExtra<'_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let FrameExtra { catch_unwind, borrow_tracker, timing: _, is_user_relevant: _, salt: _ } =
self;
let FrameExtra {
catch_unwind,
borrow_tracker,
timing: _,
is_user_relevant: _,
salt: _,
data_race: _,
} = self;

catch_unwind.visit_provenance(visit);
borrow_tracker.visit_provenance(visit);
Expand Down Expand Up @@ -1416,6 +1434,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
timing,
is_user_relevant: ecx.machine.is_user_relevant(&frame),
salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL,
data_race: ecx.machine.data_race.as_ref().map(|_| data_race::FrameState::default()),
};

Ok(frame.with_extra(extra))
Expand Down Expand Up @@ -1521,17 +1540,43 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
res
}

fn after_local_allocated(
fn after_local_read(ecx: &InterpCx<'tcx, Self>, local: mir::Local) -> InterpResult<'tcx> {
if let Some(data_race) = &ecx.frame().extra.data_race {
data_race.local_read(local, &ecx.machine);
}
Ok(())
}

fn after_local_write(
ecx: &mut InterpCx<'tcx, Self>,
local: mir::Local,
storage_live: bool,
) -> InterpResult<'tcx> {
if let Some(data_race) = &ecx.frame().extra.data_race {
data_race.local_write(local, storage_live, &ecx.machine);
}
Ok(())
}

fn after_local_moved_to_memory(
ecx: &mut InterpCx<'tcx, Self>,
local: mir::Local,
mplace: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx> {
let Some(Provenance::Concrete { alloc_id, .. }) = mplace.ptr().provenance else {
panic!("after_local_allocated should only be called on fresh allocations");
};
// Record the span where this was allocated: the declaration of the local.
let local_decl = &ecx.frame().body().local_decls[local];
let span = local_decl.source_info.span;
ecx.machine.allocation_spans.borrow_mut().insert(alloc_id, (span, None));
// The data race system has to fix the clocks used for this write.
let (alloc_info, machine) = ecx.get_alloc_extra_mut(alloc_id)?;
if let Some(data_race) =
&machine.threads.active_thread_stack().last().unwrap().extra.data_race
{
data_race.local_moved_to_memory(local, alloc_info.data_race.as_mut().unwrap(), machine);
}
Ok(())
}

Expand Down
57 changes: 57 additions & 0 deletions src/tools/miri/tests/fail/data_race/local_variable_alloc_race.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//@compile-flags: -Zmiri-preemption-rate=0.0 -Zmiri-disable-weak-memory-emulation
#![feature(core_intrinsics)]
#![feature(custom_mir)]

use std::intrinsics::mir::*;
use std::sync::atomic::Ordering::*;
use std::sync::atomic::*;
use std::thread::JoinHandle;

static P: AtomicPtr<u8> = AtomicPtr::new(core::ptr::null_mut());

fn spawn_thread() -> JoinHandle<()> {
std::thread::spawn(|| {
while P.load(Relaxed).is_null() {
std::hint::spin_loop();
}
unsafe {
// Initialize `*P`.
let ptr = P.load(Relaxed);
*ptr = 127;
//~^ ERROR: Data race detected between (1) creating a new allocation on thread `main` and (2) non-atomic write on thread `unnamed-1`
}
})
}

fn finish(t: JoinHandle<()>, val_ptr: *mut u8) {
P.store(val_ptr, Relaxed);

// Wait for the thread to be done.
t.join().unwrap();

// Read initialized value.
assert_eq!(unsafe { *val_ptr }, 127);
}

#[custom_mir(dialect = "runtime", phase = "optimized")]
fn main() {
mir! {
let t;
let val;
let val_ptr;
let _ret;
{
Call(t = spawn_thread(), ReturnTo(after_spawn), UnwindContinue())
}
after_spawn = {
// This races with the write in the other thread.
StorageLive(val);

val_ptr = &raw mut val;
Call(_ret = finish(t, val_ptr), ReturnTo(done), UnwindContinue())
}
done = {
Return()
}
}
}
Loading

0 comments on commit 2894ff7

Please sign in to comment.