Skip to content

Commit

Permalink
Auto merge of #118336 - saethlin:const-to-op-cache, r=RalfJung
Browse files Browse the repository at this point in the history
Return a finite number of AllocIds per ConstAllocation in Miri

Before this, every evaluation of a const slice would produce a new AllocId. So in Miri, this program used to have unbounded memory use:
```rust
fn main() {
    loop {
        helper();
    }
}

fn helper() {
    "ouch";
}
```
Every trip around the loop creates a new AllocId which we need to keep track of a base address for. And the provenance GC can never clean up that AllocId -> u64 mapping, because the AllocId is for a const allocation which will never be deallocated.

So this PR moves the logic of producing an AllocId for a ConstAllocation to the Machine trait, and the implementation that Miri provides will only produce 16 AllocIds for each allocation. The cache is also keyed on the Instance that the const is evaluated in, so that equal consts evaluated in two functions will have disjoint base addresses.

r? RalfJung
  • Loading branch information
bors committed Jan 24, 2024
2 parents f6ee4bf + c8a675d commit 5dd57b2
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 9 deletions.
16 changes: 9 additions & 7 deletions compiler/rustc_const_eval/src/interpret/eval_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1131,13 +1131,15 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
span: Option<Span>,
layout: Option<TyAndLayout<'tcx>>,
) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> {
let const_val = val.eval(*self.tcx, self.param_env, span).map_err(|err| {
// FIXME: somehow this is reachable even when POST_MONO_CHECKS is on.
// Are we not always populating `required_consts`?
err.emit_note(*self.tcx);
err
})?;
self.const_val_to_op(const_val, val.ty(), layout)
M::eval_mir_constant(self, *val, span, layout, |ecx, val, span, layout| {
let const_val = val.eval(*ecx.tcx, ecx.param_env, span).map_err(|err| {
// FIXME: somehow this is reachable even when POST_MONO_CHECKS is on.
// Are we not always populating `required_consts`?
err.emit_note(*ecx.tcx);
err
})?;
ecx.const_val_to_op(const_val, val.ty(), layout)
})
}

#[must_use]
Expand Down
22 changes: 22 additions & 0 deletions compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty;
use rustc_middle::ty::layout::TyAndLayout;
use rustc_span::def_id::DefId;
use rustc_span::Span;
use rustc_target::abi::{Align, Size};
use rustc_target::spec::abi::Abi as CallAbi;

Expand Down Expand Up @@ -510,6 +511,27 @@ pub trait Machine<'mir, 'tcx: 'mir>: Sized {
) -> InterpResult<'tcx> {
Ok(())
}

/// Evaluate the given constant. The `eval` function will do all the required evaluation,
/// but this hook has the chance to do some pre/postprocessing.
#[inline(always)]
fn eval_mir_constant<F>(
ecx: &InterpCx<'mir, 'tcx, Self>,
val: mir::Const<'tcx>,
span: Option<Span>,
layout: Option<TyAndLayout<'tcx>>,
eval: F,
) -> InterpResult<'tcx, OpTy<'tcx, Self::Provenance>>
where
F: Fn(
&InterpCx<'mir, 'tcx, Self>,
mir::Const<'tcx>,
Option<Span>,
Option<TyAndLayout<'tcx>>,
) -> InterpResult<'tcx, OpTy<'tcx, Self::Provenance>>,
{
eval(ecx, val, span, layout)
}
}

/// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines
Expand Down
52 changes: 50 additions & 2 deletions src/tools/miri/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::hash_map::Entry;
use std::fmt;
use std::path::Path;
use std::process;

use either::Either;
use rand::rngs::StdRng;
use rand::SeedableRng;
use rand::Rng;

use rustc_ast::ast::Mutability;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
Expand Down Expand Up @@ -45,6 +47,11 @@ pub const SIGRTMIN: i32 = 34;
/// `SIGRTMAX` - `SIGRTMIN` >= 8 (which is the value of `_POSIX_RTSIG_MAX`)
pub const SIGRTMAX: i32 = 42;

/// Each const has multiple addresses, but only this many. Since const allocations are never
/// deallocated, choosing a new [`AllocId`] and thus base address for each evaluation would
/// produce unbounded memory usage.
const ADDRS_PER_CONST: usize = 16;

/// Extra data stored with each stack frame
pub struct FrameExtra<'tcx> {
/// Extra data for the Borrow Tracker.
Expand All @@ -65,12 +72,18 @@ pub struct FrameExtra<'tcx> {
/// optimization.
/// This is used by `MiriMachine::current_span` and `MiriMachine::caller_span`
pub is_user_relevant: bool,

/// We have a cache for the mapping from [`mir::Const`] to resulting [`AllocId`].
/// However, we don't want all frames to always get the same result, so we insert
/// an additional bit of "salt" into the cache key. This salt is fixed per-frame
/// so that within a call, a const will have a stable address.
salt: usize,
}

impl<'tcx> std::fmt::Debug for FrameExtra<'tcx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Omitting `timing`, it does not support `Debug`.
let FrameExtra { borrow_tracker, catch_unwind, timing: _, is_user_relevant: _ } = self;
let FrameExtra { borrow_tracker, catch_unwind, timing: _, is_user_relevant: _, salt: _ } = self;
f.debug_struct("FrameData")
.field("borrow_tracker", borrow_tracker)
.field("catch_unwind", catch_unwind)
Expand All @@ -80,7 +93,7 @@ impl<'tcx> std::fmt::Debug for FrameExtra<'tcx> {

impl VisitProvenance for FrameExtra<'_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let FrameExtra { catch_unwind, borrow_tracker, timing: _, is_user_relevant: _ } = self;
let FrameExtra { catch_unwind, borrow_tracker, timing: _, is_user_relevant: _, salt: _ } = self;

catch_unwind.visit_provenance(visit);
borrow_tracker.visit_provenance(visit);
Expand Down Expand Up @@ -552,6 +565,11 @@ pub struct MiriMachine<'mir, 'tcx> {
/// The spans we will use to report where an allocation was created and deallocated in
/// diagnostics.
pub(crate) allocation_spans: RefCell<FxHashMap<AllocId, (Span, Option<Span>)>>,

/// Maps MIR consts to their evaluated result. We combine the const with a "salt" (`usize`)
/// that is fixed per stack frame; this lets us have sometimes different results for the
/// same const while ensuring consistent results within a single call.
const_cache: RefCell<FxHashMap<(mir::Const<'tcx>, usize), OpTy<'tcx, Provenance>>>,
}

impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
Expand Down Expand Up @@ -677,6 +695,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
stack_size,
collect_leak_backtraces: config.collect_leak_backtraces,
allocation_spans: RefCell::new(FxHashMap::default()),
const_cache: RefCell::new(FxHashMap::default()),
}
}

Expand Down Expand Up @@ -857,6 +876,7 @@ impl VisitProvenance for MiriMachine<'_, '_> {
stack_size: _,
collect_leak_backtraces: _,
allocation_spans: _,
const_cache: _,
} = self;

threads.visit_provenance(visit);
Expand Down Expand Up @@ -1400,6 +1420,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
catch_unwind: None,
timing,
is_user_relevant: ecx.machine.is_user_relevant(&frame),
salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_CONST,
};

Ok(frame.with_extra(extra))
Expand Down Expand Up @@ -1505,4 +1526,31 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ecx.machine.allocation_spans.borrow_mut().insert(alloc_id, (span, None));
Ok(())
}

fn eval_mir_constant<F>(
ecx: &InterpCx<'mir, 'tcx, Self>,
val: mir::Const<'tcx>,
span: Option<Span>,
layout: Option<TyAndLayout<'tcx>>,
eval: F,
) -> InterpResult<'tcx, OpTy<'tcx, Self::Provenance>>
where
F: Fn(
&InterpCx<'mir, 'tcx, Self>,
mir::Const<'tcx>,
Option<Span>,
Option<TyAndLayout<'tcx>>,
) -> InterpResult<'tcx, OpTy<'tcx, Self::Provenance>>,
{
let frame = ecx.active_thread_stack().last().unwrap();
let mut cache = ecx.machine.const_cache.borrow_mut();
match cache.entry((val, frame.extra.salt)) {
Entry::Vacant(ve) => {
let op = eval(ecx, val, span, layout)?;
ve.insert(op.clone());
Ok(op)
}
Entry::Occupied(oe) => Ok(oe.get().clone()),
}
}
}
38 changes: 38 additions & 0 deletions src/tools/miri/tests/pass/const-addrs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// The const fn interpreter creates a new AllocId every time it evaluates any const.
// If we do that in Miri, repeatedly evaluating a const causes unbounded memory use
// we need to keep track of the base address for that AllocId, and the allocation is never
// deallocated.
// In Miri we explicitly store previously-assigned AllocIds for each const and ensure
// that we only hand out a finite number of AllocIds per const.
// MIR inlining will put every evaluation of the const we're repeatedly evaluting into the same
// stack frame, breaking this test.
//@compile-flags: -Zinline-mir=no
#![feature(strict_provenance)]

const EVALS: usize = 256;

use std::collections::HashSet;
fn main() {
let mut addrs = HashSet::new();
for _ in 0..EVALS {
addrs.insert(const_addr());
}
// Check that the const allocation has multiple base addresses
assert!(addrs.len() > 1);
// But also that we get a limited number of unique base addresses
assert!(addrs.len() < EVALS);

// Check that within a call we always produce the same address
let mut prev = 0;
for iter in 0..EVALS {
let addr = "test".as_bytes().as_ptr().addr();
if iter > 0 {
assert_eq!(prev, addr);
}
prev = addr;
}
}

fn const_addr() -> usize {
"test".as_bytes().as_ptr().addr()
}

0 comments on commit 5dd57b2

Please sign in to comment.