diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 3e9d29df02cdf..19e36442dce19 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -17,10 +17,7 @@ use rustc_ast::expand::allocator::AllocatorKind; use rustc_attr as attr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::profiling::{get_resident_set_size, print_time_passes_entry}; - -use rustc_data_structures::sync::par_iter; -#[cfg(parallel_compiler)] -use rustc_data_structures::sync::ParallelIterator; +use rustc_data_structures::sync::par_map; use rustc_hir as hir; use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_hir::lang_items::LangItem; @@ -682,7 +679,7 @@ pub fn codegen_crate( // This likely is a temporary measure. Once we don't have to support the // non-parallel compiler anymore, we can compile CGUs end-to-end in // parallel and get rid of the complicated scheduling logic. - let mut pre_compiled_cgus = if cfg!(parallel_compiler) { + let mut pre_compiled_cgus = if rustc_data_structures::sync::active() { tcx.sess.time("compile_first_CGU_batch", || { // Try to find one CGU to compile per thread. let cgus: Vec<_> = cgu_reuse @@ -695,12 +692,10 @@ pub fn codegen_crate( // Compile the found CGUs in parallel. let start_time = Instant::now(); - let pre_compiled_cgus = par_iter(cgus) - .map(|(i, _)| { - let module = backend.compile_codegen_unit(tcx, codegen_units[i].name()); - (i, module) - }) - .collect(); + let pre_compiled_cgus = par_map(cgus, |(i, _)| { + let module = backend.compile_codegen_unit(tcx, codegen_units[i].name()); + (i, module) + }); total_codegen_time += start_time.elapsed(); diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml index 39f4bc63c88d1..06ce1adc685c9 100644 --- a/compiler/rustc_data_structures/Cargo.toml +++ b/compiler/rustc_data_structures/Cargo.toml @@ -10,11 +10,11 @@ arrayvec = { version = "0.7", default-features = false } bitflags = "1.2.1" cfg-if = "1.0" ena = "0.14.2" -indexmap = { version = "1.9.3" } +indexmap = { version = "1.9.3", features = ["rustc-rayon"] } jobserver_crate = { version = "0.1.13", package = "jobserver" } libc = "0.2" measureme = "10.0.0" -rustc-rayon-core = { version = "0.5.0", optional = true } +rustc-rayon-core = { version = "0.5.0" } rustc-rayon = { version = "0.5.0", optional = true } rustc_graphviz = { path = "../rustc_graphviz" } rustc-hash = "1.1.0" @@ -51,4 +51,4 @@ features = [ memmap2 = "0.2.1" [features] -rustc_use_parallel_compiler = ["indexmap/rustc-rayon", "rustc-rayon", "rustc-rayon-core"] +rustc_use_parallel_compiler = ["rustc-rayon"] \ No newline at end of file diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs index bd7a86f67800f..f720e5110a93d 100644 --- a/compiler/rustc_data_structures/src/sharded.rs +++ b/compiler/rustc_data_structures/src/sharded.rs @@ -1,5 +1,5 @@ use crate::fx::{FxHashMap, FxHasher}; -use crate::sync::{Lock, LockGuard}; +use crate::sync::{active, Lock, LockGuard}; use std::borrow::Borrow; use std::collections::hash_map::RawEntryMut; use std::hash::{Hash, Hasher}; @@ -9,20 +9,17 @@ use std::mem; #[cfg_attr(parallel_compiler, repr(align(64)))] struct CacheAligned(T); -#[cfg(parallel_compiler)] // 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700, // but this should be tested on higher core count CPUs. How the `Sharded` type gets used // may also affect the ideal number of shards. const SHARD_BITS: usize = 5; -#[cfg(not(parallel_compiler))] -const SHARD_BITS: usize = 0; - pub const SHARDS: usize = 1 << SHARD_BITS; /// An array of cache-line aligned inner locked structures with convenience methods. pub struct Sharded { shards: [CacheAligned>; SHARDS], + single_thread: bool, } impl Default for Sharded { @@ -35,31 +32,41 @@ impl Default for Sharded { impl Sharded { #[inline] pub fn new(mut value: impl FnMut() -> T) -> Self { - Sharded { shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))) } + Sharded { + shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))), + single_thread: !active(), + } } /// The shard is selected by hashing `val` with `FxHasher`. #[inline] pub fn get_shard_by_value(&self, val: &K) -> &Lock { - if SHARDS == 1 { &self.shards[0].0 } else { self.get_shard_by_hash(make_hash(val)) } + if self.single_thread { &self.shards[0].0 } else { self.get_shard_by_hash(make_hash(val)) } } #[inline] pub fn get_shard_by_hash(&self, hash: u64) -> &Lock { - &self.shards[get_shard_index_by_hash(hash)].0 - } - - #[inline] - pub fn get_shard_by_index(&self, i: usize) -> &Lock { - &self.shards[i].0 + if self.single_thread { + &self.shards[0].0 + } else { + &self.shards[get_shard_index_by_hash(hash)].0 + } } pub fn lock_shards(&self) -> Vec> { - (0..SHARDS).map(|i| self.shards[i].0.lock()).collect() + if self.single_thread { + vec![self.shards[0].0.lock()] + } else { + (0..SHARDS).map(|i| self.shards[i].0.lock()).collect() + } } pub fn try_lock_shards(&self) -> Option>> { - (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect() + if self.single_thread { + Some(vec![self.shards[0].0.try_lock()?]) + } else { + (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect() + } } } @@ -141,7 +148,7 @@ pub fn make_hash(val: &K) -> u64 { /// consistently for each `Sharded` instance. #[inline] #[allow(clippy::modulo_one)] -pub fn get_shard_index_by_hash(hash: u64) -> usize { +fn get_shard_index_by_hash(hash: u64) -> usize { let hash_len = mem::size_of::(); // Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits. // hashbrown also uses the lowest bits, so we can't use those diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs index ef1da85198fd4..534ba7f9c15e4 100644 --- a/compiler/rustc_data_structures/src/sync.rs +++ b/compiler/rustc_data_structures/src/sync.rs @@ -40,8 +40,13 @@ //! [^2] `MTLockRef` is a typedef. use crate::owned_slice::OwnedSlice; +use std::cell::{Cell, UnsafeCell}; use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; use std::hash::{BuildHasher, Hash}; +use std::intrinsics::likely; +use std::marker::PhantomData; +use std::mem::MaybeUninit; use std::ops::{Deref, DerefMut}; use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe}; @@ -51,91 +56,49 @@ pub use std::sync::atomic::Ordering::SeqCst; pub use vec::{AppendOnlyIndexVec, AppendOnlyVec}; mod vec; +use parking_lot::lock_api::RawMutex as _; +use parking_lot::RawMutex; -cfg_if! { - if #[cfg(not(parallel_compiler))] { - pub unsafe auto trait Send {} - pub unsafe auto trait Sync {} - - unsafe impl Send for T {} - unsafe impl Sync for T {} - - use std::ops::Add; - - /// This is a single threaded variant of `AtomicU64`, `AtomicUsize`, etc. - /// It has explicit ordering arguments and is only intended for use with - /// the native atomic types. - /// You should use this type through the `AtomicU64`, `AtomicUsize`, etc, type aliases - /// as it's not intended to be used separately. - #[derive(Debug, Default)] - pub struct Atomic(Cell); +mod mode { + use super::Ordering; + use std::sync::atomic::AtomicU8; - impl Atomic { - #[inline] - pub fn new(v: T) -> Self { - Atomic(Cell::new(v)) - } + const UNINITIALIZED: u8 = 0; + const INACTIVE: u8 = 1; + const ACTIVE: u8 = 2; - #[inline] - pub fn into_inner(self) -> T { - self.0.into_inner() - } + static MODE: AtomicU8 = AtomicU8::new(UNINITIALIZED); - #[inline] - pub fn load(&self, _: Ordering) -> T { - self.0.get() - } - - #[inline] - pub fn store(&self, val: T, _: Ordering) { - self.0.set(val) - } - - #[inline] - pub fn swap(&self, val: T, _: Ordering) -> T { - self.0.replace(val) - } + #[inline] + pub fn active() -> bool { + match MODE.load(Ordering::Relaxed) { + INACTIVE => false, + ACTIVE => true, + // Should panic here. Just for speed test. + // _ => panic!("uninitialized parallel mode!"), + _ => false, } + } - impl Atomic { - pub fn fetch_or(&self, val: bool, _: Ordering) -> bool { - let result = self.0.get() | val; - self.0.set(val); - result - } - } + // Only set by the `-Z threads` compile option + pub fn set(parallel: bool) { + let set: u8 = if parallel { ACTIVE } else { INACTIVE }; + let previous = + MODE.compare_exchange(UNINITIALIZED, set, Ordering::Relaxed, Ordering::Relaxed); - impl Atomic { - #[inline] - pub fn compare_exchange(&self, - current: T, - new: T, - _: Ordering, - _: Ordering) - -> Result { - let read = self.0.get(); - if read == current { - self.0.set(new); - Ok(read) - } else { - Err(read) - } - } - } + // Check that the mode was either uninitialized or was already set to the requested mode. + assert!(previous.is_ok() || previous == Err(set)); + } +} - impl + Copy> Atomic { - #[inline] - pub fn fetch_add(&self, val: T, _: Ordering) -> T { - let old = self.0.get(); - self.0.set(old + val); - old - } - } +pub use mode::{active, set}; +cfg_if! { + if #[cfg(not(parallel_compiler))] { + pub unsafe auto trait Send {} + pub unsafe auto trait Sync {} - pub type AtomicUsize = Atomic; - pub type AtomicBool = Atomic; - pub type AtomicU32 = Atomic; - pub type AtomicU64 = Atomic; + unsafe impl Send for T {} + unsafe impl Sync for T {} pub fn join(oper_a: A, oper_b: B) -> (RA, RB) where A: FnOnce() -> RA, @@ -146,7 +109,7 @@ cfg_if! { #[macro_export] macro_rules! parallel { - ($($blocks:tt),*) => { + ($($blocks:block),*) => { // We catch panics here ensuring that all the blocks execute. // This makes behavior consistent with the parallel compiler. let mut panic = None; @@ -165,12 +128,6 @@ cfg_if! { } } - pub use Iterator as ParallelIterator; - - pub fn par_iter(t: T) -> T::IntoIter { - t.into_iter() - } - pub fn par_for_each_in(t: T, mut for_each: impl FnMut(T::Item) + Sync + Send) { // We catch panics here ensuring that all the loop iterations execute. // This makes behavior consistent with the parallel compiler. @@ -187,194 +144,290 @@ cfg_if! { } } - pub type MetadataRef = OwnedSlice; - - pub use std::rc::Rc as Lrc; - pub use std::rc::Weak as Weak; - pub use std::cell::Ref as ReadGuard; - pub use std::cell::Ref as MappedReadGuard; - pub use std::cell::RefMut as WriteGuard; - pub use std::cell::RefMut as MappedWriteGuard; - pub use std::cell::RefMut as LockGuard; - pub use std::cell::RefMut as MappedLockGuard; - - pub use std::cell::OnceCell; - - use std::cell::RefCell as InnerRwLock; - use std::cell::RefCell as InnerLock; - - use std::cell::Cell; - - #[derive(Debug)] - pub struct WorkerLocal(OneThread); - - impl WorkerLocal { - /// Creates a new worker local where the `initial` closure computes the - /// value this worker local should take for each thread in the thread pool. - #[inline] - pub fn new T>(mut f: F) -> WorkerLocal { - WorkerLocal(OneThread::new(f(0))) - } - - /// Returns the worker-local value for each thread - #[inline] - pub fn into_inner(self) -> Vec { - vec![OneThread::into_inner(self.0)] + pub fn par_map>( + t: T, + mut map: impl FnMut(<::IntoIter as Iterator>::Item) -> R, + ) -> C { + // We catch panics here ensuring that all the loop iterations execute. + let mut panic = None; + let r = t.into_iter().filter_map(|i| { + match catch_unwind(AssertUnwindSafe(|| map(i))) { + Ok(r) => Some(r), + Err(p) => { + if panic.is_none() { + panic = Some(p); + } + None + } + } + }).collect(); + if let Some(panic) = panic { + resume_unwind(panic); } + r } - impl Deref for WorkerLocal { - type Target = T; + pub type MetadataRef = OwnedSlice; + } else { + pub use std::marker::Send as Send; + pub use std::marker::Sync as Sync; - #[inline(always)] - fn deref(&self) -> &T { - &self.0 + #[inline] + pub fn join(oper_a: A, oper_b: B) -> (RA, RB) + where + A: FnOnce() -> RA + DynSend, + B: FnOnce() -> RB + DynSend, + { + if mode::active() { + let oper_a = FromDyn::from(oper_a); + let oper_b = FromDyn::from(oper_b); + let (a, b) = rayon::join(move || FromDyn::from(oper_a.into_inner()()), move || FromDyn::from(oper_b.into_inner()())); + (a.into_inner(), b.into_inner()) + } else { + (oper_a(), oper_b()) } } - pub type MTLockRef<'a, T> = &'a mut MTLock; + // This function only works when `mode::active()`. + pub fn scope<'scope, OP, R>(op: OP) -> R + where + OP: FnOnce(&rayon::Scope<'scope>) -> R + DynSend, + R: DynSend, + { + let op = FromDyn::from(op); + rayon::scope(|s| FromDyn::from(op.into_inner()(s))).into_inner() + } - #[derive(Debug, Default)] - pub struct MTLock(T); + /// Runs a list of blocks in parallel. The first block is executed immediately on + /// the current thread. Use that for the longest running block. + #[macro_export] + macro_rules! parallel { + (impl $fblock:block [$($c:expr,)*] [$block:expr $(, $rest:expr)*]) => { + parallel!(impl $fblock [$block, $($c,)*] [$($rest),*]) + }; + (impl $fblock:block [$($blocks:expr,)*] []) => { + ::rustc_data_structures::sync::scope(|s| { + $(let block = rustc_data_structures::sync::FromDyn::from(|| $blocks); + s.spawn(move |_| block.into_inner()());)* + (|| $fblock)(); + }); + }; + ($fblock:block, $($blocks:block),*) => { + if rustc_data_structures::sync::active() { + // Reverse the order of the later blocks since Rayon executes them in reverse order + // when using a single thread. This ensures the execution order matches that + // of a single threaded rustc + parallel!(impl $fblock [] [$($blocks),*]); + } else { + // We catch panics here ensuring that all the blocks execute. + // This makes behavior consistent with the parallel compiler. + let mut panic = None; + if let Err(p) = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| $fblock) + ) { + if panic.is_none() { + panic = Some(p); + } + } + $( + if let Err(p) = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| $blocks) + ) { + if panic.is_none() { + panic = Some(p); + } + } + )* + if let Some(panic) = panic { + ::std::panic::resume_unwind(panic); + } + } + }; + } - impl MTLock { - #[inline(always)] - pub fn new(inner: T) -> Self { - MTLock(inner) - } + use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator}; - #[inline(always)] - pub fn into_inner(self) -> T { - self.0 - } + pub fn par_for_each_in + IntoParallelIterator>( + t: T, + for_each: impl Fn(I) + DynSync + DynSend + ) { + if mode::active() { + let for_each = FromDyn::from(for_each); + let panic: Lock> = Lock::new(None); + t.into_par_iter().for_each(|i| if let Err(p) = catch_unwind(AssertUnwindSafe(|| for_each(i))) { + let mut l = panic.lock(); + if l.is_none() { + *l = Some(p) + } + }); - #[inline(always)] - pub fn get_mut(&mut self) -> &mut T { - &mut self.0 + if let Some(panic) = panic.into_inner() { + resume_unwind(panic); + } + } else { + // We catch panics here ensuring that all the loop iterations execute. + // This makes behavior consistent with the parallel compiler. + let mut panic = None; + t.into_iter().for_each(|i| { + if let Err(p) = catch_unwind(AssertUnwindSafe(|| for_each(i))) { + if panic.is_none() { + panic = Some(p); + } + } + }); + if let Some(panic) = panic { + resume_unwind(panic); + } } + } - #[inline(always)] - pub fn lock(&self) -> &T { - &self.0 - } + pub fn par_map< + I, + T: IntoIterator + IntoParallelIterator, + R: std::marker::Send, + C: FromIterator + FromParallelIterator + >( + t: T, + map: impl Fn(I) -> R + DynSync + DynSend + ) -> C { + if mode::active() { + let panic: Lock> = Lock::new(None); + let map = FromDyn::from(map); + // We catch panics here ensuring that all the loop iterations execute. + let r = t.into_par_iter().filter_map(|i| { + match catch_unwind(AssertUnwindSafe(|| map(i))) { + Ok(r) => Some(r), + Err(p) => { + let mut l = panic.lock(); + if l.is_none() { + *l = Some(p); + } + None + }, + } + }).collect(); - #[inline(always)] - pub fn lock_mut(&mut self) -> &mut T { - &mut self.0 + if let Some(panic) = panic.into_inner() { + resume_unwind(panic); + } + r + } else { + // We catch panics here ensuring that all the loop iterations execute. + let mut panic = None; + let r = t.into_iter().filter_map(|i| { + match catch_unwind(AssertUnwindSafe(|| map(i))) { + Ok(r) => Some(r), + Err(p) => { + if panic.is_none() { + panic = Some(p); + } + None + } + } + }).collect(); + if let Some(panic) = panic { + resume_unwind(panic); + } + r } } - // FIXME: Probably a bad idea (in the threaded case) - impl Clone for MTLock { - #[inline] - fn clone(&self) -> Self { - MTLock(self.0.clone()) - } - } - } else { - pub use std::marker::Send as Send; - pub use std::marker::Sync as Sync; + pub type MetadataRef = OwnedSlice; - pub use parking_lot::RwLockReadGuard as ReadGuard; - pub use parking_lot::MappedRwLockReadGuard as MappedReadGuard; - pub use parking_lot::RwLockWriteGuard as WriteGuard; - pub use parking_lot::MappedRwLockWriteGuard as MappedWriteGuard; + } +} - pub use parking_lot::MutexGuard as LockGuard; - pub use parking_lot::MappedMutexGuard as MappedLockGuard; +pub use std::sync::Arc as Lrc; - pub use std::sync::OnceLock as OnceCell; +use std::thread; - pub use std::sync::atomic::{AtomicBool, AtomicUsize, AtomicU32, AtomicU64}; +use parking_lot::RwLock as InnerRwLock; - pub use std::sync::Arc as Lrc; - pub use std::sync::Weak as Weak; +pub use parking_lot::MappedRwLockReadGuard as MappedReadGuard; +pub use parking_lot::MappedRwLockWriteGuard as MappedWriteGuard; +pub use parking_lot::RwLockReadGuard as ReadGuard; +pub use parking_lot::RwLockWriteGuard as WriteGuard; - pub type MTLockRef<'a, T> = &'a MTLock; +pub use std::sync::OnceLock as OnceCell; - #[derive(Debug, Default)] - pub struct MTLock(Lock); +pub use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize}; - impl MTLock { - #[inline(always)] - pub fn new(inner: T) -> Self { - MTLock(Lock::new(inner)) - } +pub type MTLockRef<'a, T> = &'a MTLock; - #[inline(always)] - pub fn into_inner(self) -> T { - self.0.into_inner() - } +#[derive(Debug, Default)] +pub struct MTLock(Lock); - #[inline(always)] - pub fn get_mut(&mut self) -> &mut T { - self.0.get_mut() - } +impl MTLock { + #[inline(always)] + pub fn new(inner: T) -> Self { + MTLock(Lock::new(inner)) + } - #[inline(always)] - pub fn lock(&self) -> LockGuard<'_, T> { - self.0.lock() - } + #[inline(always)] + pub fn into_inner(self) -> T { + self.0.into_inner() + } - #[inline(always)] - pub fn lock_mut(&self) -> LockGuard<'_, T> { - self.lock() - } - } + #[inline(always)] + pub fn get_mut(&mut self) -> &mut T { + self.0.get_mut() + } - use parking_lot::Mutex as InnerLock; - use parking_lot::RwLock as InnerRwLock; + #[inline(always)] + pub fn lock(&self) -> LockGuard<'_, T> { + self.0.lock() + } - use std::thread; - pub use rayon::{join, scope}; + #[inline(always)] + pub fn lock_mut(&self) -> LockGuard<'_, T> { + self.lock() + } +} - /// Runs a list of blocks in parallel. The first block is executed immediately on - /// the current thread. Use that for the longest running block. - #[macro_export] - macro_rules! parallel { - (impl $fblock:tt [$($c:tt,)*] [$block:tt $(, $rest:tt)*]) => { - parallel!(impl $fblock [$block, $($c,)*] [$($rest),*]) - }; - (impl $fblock:tt [$($blocks:tt,)*] []) => { - ::rustc_data_structures::sync::scope(|s| { - $( - s.spawn(|_| $blocks); - )* - $fblock; - }) - }; - ($fblock:tt, $($blocks:tt),*) => { - // Reverse the order of the later blocks since Rayon executes them in reverse order - // when using a single thread. This ensures the execution order matches that - // of a single threaded rustc - parallel!(impl $fblock [] [$($blocks),*]); - }; - } +/// This makes locks panic if they are already held. +/// It is only useful when you are running in a single thread +const ERROR_CHECKING: bool = false; - pub use rayon_core::WorkerLocal; +pub unsafe trait DynSend {} +pub unsafe trait DynSync {} - pub use rayon::iter::ParallelIterator; - use rayon::iter::IntoParallelIterator; +unsafe impl DynSend for T where T: Send {} +unsafe impl DynSync for T where T: Sync {} - pub fn par_iter(t: T) -> T::Iter { - t.into_par_iter() - } +#[derive(Copy, Clone)] +pub struct FromDyn(T); - pub fn par_for_each_in( - t: T, - for_each: impl Fn(T::Item) + Sync + Send, - ) { - let ps: Vec<_> = t.into_par_iter().map(|i| catch_unwind(AssertUnwindSafe(|| for_each(i)))).collect(); - ps.into_iter().for_each(|p| if let Err(panic) = p { - resume_unwind(panic) - }); - } +impl FromDyn { + #[inline(always)] + pub fn from(val: T) -> Self { + // Check that `sync::active()` is true on creation so we can + // implement `Send` and `Sync` for this structure when `T` + // implements `DynSend` and `DynSync` respectively. + #[cfg(parallel_compiler)] + assert!(mode::active()); + FromDyn(val) + } - pub type MetadataRef = OwnedSlice; + #[inline(always)] + pub fn into_inner(self) -> T { + self.0 + } +} + +// `FromDyn` is `Send` if `T` is `DynSend`, since it ensures that sync::active() is true. +#[cfg(parallel_compiler)] +unsafe impl Send for FromDyn {} + +// `FromDyn` is `Sync` if `T` is `DynSync`, since it ensures that sync::active() is true. +#[cfg(parallel_compiler)] +unsafe impl Sync for FromDyn {} + +impl std::ops::Deref for FromDyn { + type Target = T; - /// This makes locks panic if they are already held. - /// It is only useful when you are running in a single thread - const ERROR_CHECKING: bool = false; + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.0 } } @@ -395,53 +448,86 @@ impl HashMapExt for HashMap } } -#[derive(Debug)] -pub struct Lock(InnerLock); +pub struct Lock { + single_thread: bool, + data: UnsafeCell, + borrow: Cell, + mutex: RawMutex, +} + +impl Debug for Lock { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.try_lock() { + Some(guard) => f.debug_struct("Lock").field("data", &&*guard).finish(), + None => { + struct LockedPlaceholder; + impl Debug for LockedPlaceholder { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("") + } + } + + f.debug_struct("Lock").field("data", &LockedPlaceholder).finish() + } + } + } +} impl Lock { - #[inline(always)] - pub fn new(inner: T) -> Self { - Lock(InnerLock::new(inner)) + #[inline] + pub fn new(val: T) -> Self { + Lock { + single_thread: !active(), + data: UnsafeCell::new(val), + borrow: Cell::new(false), + mutex: RawMutex::INIT, + } } - #[inline(always)] + #[inline] pub fn into_inner(self) -> T { - self.0.into_inner() + self.data.into_inner() } - #[inline(always)] + #[inline] pub fn get_mut(&mut self) -> &mut T { - self.0.get_mut() + self.data.get_mut() } - #[cfg(parallel_compiler)] - #[inline(always)] - pub fn try_lock(&self) -> Option> { - self.0.try_lock() - } - - #[cfg(not(parallel_compiler))] - #[inline(always)] + #[inline] pub fn try_lock(&self) -> Option> { - self.0.try_borrow_mut().ok() + // SAFETY: the `&mut T` is accessible as long as self exists. + if likely(self.single_thread) { + if self.borrow.get() { + None + } else { + self.borrow.set(true); + Some(LockGuard { lock: &self, marker: PhantomData }) + } + } else { + if !self.mutex.try_lock() { + None + } else { + Some(LockGuard { lock: &self, marker: PhantomData }) + } + } } - #[cfg(parallel_compiler)] - #[inline(always)] - #[track_caller] - pub fn lock(&self) -> LockGuard<'_, T> { - if ERROR_CHECKING { - self.0.try_lock().expect("lock was already held") + #[inline(never)] + fn lock_raw(&self) { + if likely(self.single_thread) { + assert!(!self.borrow.get()); + self.borrow.set(true); } else { - self.0.lock() + self.mutex.lock(); } } - #[cfg(not(parallel_compiler))] #[inline(always)] #[track_caller] pub fn lock(&self) -> LockGuard<'_, T> { - self.0.borrow_mut() + self.lock_raw(); + LockGuard { lock: &self, marker: PhantomData } } #[inline(always)] @@ -470,6 +556,48 @@ impl Default for Lock { } } +// Just for speed test +unsafe impl std::marker::Send for Lock {} +unsafe impl std::marker::Sync for Lock {} + +pub struct LockGuard<'a, T> { + lock: &'a Lock, + marker: PhantomData<&'a mut T>, +} + +impl Deref for LockGuard<'_, T> { + type Target = T; + + #[inline(always)] + fn deref(&self) -> &T { + unsafe { &*self.lock.data.get() } + } +} + +impl DerefMut for LockGuard<'_, T> { + #[inline(always)] + fn deref_mut(&mut self) -> &mut T { + unsafe { &mut *self.lock.data.get() } + } +} + +#[inline(never)] +fn unlock_mt(guard: &mut LockGuard<'_, T>) { + unsafe { guard.lock.mutex.unlock() } +} + +impl<'a, T> Drop for LockGuard<'a, T> { + #[inline] + fn drop(&mut self) { + if likely(self.lock.single_thread) { + debug_assert!(self.lock.borrow.get()); + self.lock.borrow.set(false); + } else { + unlock_mt(self) + } + } +} + #[derive(Debug, Default)] pub struct RwLock(InnerRwLock); @@ -489,14 +617,6 @@ impl RwLock { self.0.get_mut() } - #[cfg(not(parallel_compiler))] - #[inline(always)] - #[track_caller] - pub fn read(&self) -> ReadGuard<'_, T> { - self.0.borrow() - } - - #[cfg(parallel_compiler)] #[inline(always)] pub fn read(&self) -> ReadGuard<'_, T> { if ERROR_CHECKING { @@ -512,26 +632,11 @@ impl RwLock { f(&*self.read()) } - #[cfg(not(parallel_compiler))] - #[inline(always)] - pub fn try_write(&self) -> Result, ()> { - self.0.try_borrow_mut().map_err(|_| ()) - } - - #[cfg(parallel_compiler)] #[inline(always)] pub fn try_write(&self) -> Result, ()> { self.0.try_write().ok_or(()) } - #[cfg(not(parallel_compiler))] - #[inline(always)] - #[track_caller] - pub fn write(&self) -> WriteGuard<'_, T> { - self.0.borrow_mut() - } - - #[cfg(parallel_compiler)] #[inline(always)] pub fn write(&self) -> WriteGuard<'_, T> { if ERROR_CHECKING { @@ -559,13 +664,6 @@ impl RwLock { self.write() } - #[cfg(not(parallel_compiler))] - #[inline(always)] - pub fn leak(&self) -> &T { - ReadGuard::leak(self.read()) - } - - #[cfg(parallel_compiler)] #[inline(always)] pub fn leak(&self) -> &T { let guard = self.read(); @@ -583,34 +681,68 @@ impl Clone for RwLock { } } +#[derive(Debug)] +pub struct WorkerLocal { + single_thread: bool, + inner: T, + mt_inner: Option>, +} + +impl WorkerLocal { + /// Creates a new worker local where the `initial` closure computes the + /// value this worker local should take for each thread in the thread pool. + #[inline] + pub fn new T>(mut f: F) -> WorkerLocal { + if !active() { + WorkerLocal { single_thread: true, inner: f(0), mt_inner: None } + } else { + WorkerLocal { + single_thread: false, + inner: unsafe { MaybeUninit::uninit().assume_init() }, + mt_inner: Some(rayon_core::WorkerLocal::new(f)), + } + } + } + + /// Returns the worker-local value for each thread + #[inline] + pub fn into_inner(self) -> Vec { + if self.single_thread { vec![self.inner] } else { self.mt_inner.unwrap().into_inner() } + } +} + +impl Deref for WorkerLocal { + type Target = T; + + #[inline(always)] + fn deref(&self) -> &T { + if self.single_thread { &self.inner } else { self.mt_inner.as_ref().unwrap().deref() } + } +} + +// Just for speed test +unsafe impl std::marker::Sync for WorkerLocal {} + /// A type which only allows its inner value to be used in one thread. /// It will panic if it is used on multiple threads. #[derive(Debug)] pub struct OneThread { - #[cfg(parallel_compiler)] thread: thread::ThreadId, inner: T, } -#[cfg(parallel_compiler)] unsafe impl std::marker::Sync for OneThread {} -#[cfg(parallel_compiler)] unsafe impl std::marker::Send for OneThread {} impl OneThread { #[inline(always)] fn check(&self) { - #[cfg(parallel_compiler)] assert_eq!(thread::current().id(), self.thread); } #[inline(always)] pub fn new(inner: T) -> Self { - OneThread { - #[cfg(parallel_compiler)] - thread: thread::current().id(), - inner, - } + OneThread { thread: thread::current().id(), inner } } #[inline(always)] diff --git a/compiler/rustc_data_structures/src/sync/vec.rs b/compiler/rustc_data_structures/src/sync/vec.rs index 1783b4b357257..f3430b8c736d3 100644 --- a/compiler/rustc_data_structures/src/sync/vec.rs +++ b/compiler/rustc_data_structures/src/sync/vec.rs @@ -4,39 +4,22 @@ use rustc_index::vec::Idx; #[derive(Default)] pub struct AppendOnlyIndexVec { - #[cfg(not(parallel_compiler))] - vec: elsa::vec::FrozenVec, - #[cfg(parallel_compiler)] vec: elsa::sync::LockFreeFrozenVec, _marker: PhantomData, } impl AppendOnlyIndexVec { pub fn new() -> Self { - Self { - #[cfg(not(parallel_compiler))] - vec: elsa::vec::FrozenVec::new(), - #[cfg(parallel_compiler)] - vec: elsa::sync::LockFreeFrozenVec::new(), - _marker: PhantomData, - } + Self { vec: elsa::sync::LockFreeFrozenVec::new(), _marker: PhantomData } } pub fn push(&self, val: T) -> I { - #[cfg(not(parallel_compiler))] - let i = self.vec.len(); - #[cfg(not(parallel_compiler))] - self.vec.push(val); - #[cfg(parallel_compiler)] let i = self.vec.push(val); I::new(i) } pub fn get(&self, i: I) -> Option { let i = i.index(); - #[cfg(not(parallel_compiler))] - return self.vec.get_copy(i); - #[cfg(parallel_compiler)] return self.vec.get(i); } } diff --git a/compiler/rustc_error_messages/Cargo.toml b/compiler/rustc_error_messages/Cargo.toml index 481c94266f271..19e62918aaadc 100644 --- a/compiler/rustc_error_messages/Cargo.toml +++ b/compiler/rustc_error_messages/Cargo.toml @@ -9,7 +9,7 @@ edition = "2021" fluent-bundle = "0.15.2" fluent-syntax = "0.11" intl-memoizer = "0.5.1" -rustc_baked_icu_data = { path = "../rustc_baked_icu_data" } +rustc_baked_icu_data = { path = "../rustc_baked_icu_data", features = ["rustc_use_parallel_compiler"] } rustc_data_structures = { path = "../rustc_data_structures" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_serialize = { path = "../rustc_serialize" } @@ -22,4 +22,4 @@ icu_locid = "1.1.0" icu_provider_adapters = "1.1.0" [features] -rustc_use_parallel_compiler = ['rustc_baked_icu_data/rustc_use_parallel_compiler'] +rustc_use_parallel_compiler = [] diff --git a/compiler/rustc_error_messages/src/lib.rs b/compiler/rustc_error_messages/src/lib.rs index 88d94c93bf520..e5da8f2b2c8e4 100644 --- a/compiler/rustc_error_messages/src/lib.rs +++ b/compiler/rustc_error_messages/src/lib.rs @@ -22,15 +22,9 @@ use std::fs; use std::io; use std::path::{Path, PathBuf}; -#[cfg(not(parallel_compiler))] -use std::cell::LazyCell as Lazy; -#[cfg(parallel_compiler)] use std::sync::LazyLock as Lazy; -#[cfg(parallel_compiler)] use intl_memoizer::concurrent::IntlLangMemoizer; -#[cfg(not(parallel_compiler))] -use intl_memoizer::IntlLangMemoizer; pub use fluent_bundle::{self, types::FluentType, FluentArgs, FluentError, FluentValue}; pub use unic_langid::{langid, LanguageIdentifier}; @@ -39,16 +33,10 @@ fluent_messages! { "../messages.ftl" } pub type FluentBundle = fluent_bundle::bundle::FluentBundle; -#[cfg(parallel_compiler)] fn new_bundle(locales: Vec) -> FluentBundle { FluentBundle::new_concurrent(locales) } -#[cfg(not(parallel_compiler))] -fn new_bundle(locales: Vec) -> FluentBundle { - FluentBundle::new(locales) -} - #[derive(Debug)] pub enum TranslationBundleError { /// Failed to read from `.ftl` file. @@ -547,15 +535,6 @@ pub fn fluent_value_from_str_list_sep_by_and(l: Vec>) -> FluentValu Cow::Owned(result) } - #[cfg(not(parallel_compiler))] - fn as_string_threadsafe( - &self, - _intls: &intl_memoizer::concurrent::IntlLangMemoizer, - ) -> Cow<'static, str> { - unreachable!("`as_string_threadsafe` is not used in non-parallel rustc") - } - - #[cfg(parallel_compiler)] fn as_string_threadsafe( &self, intls: &intl_memoizer::concurrent::IntlLangMemoizer, diff --git a/compiler/rustc_errors/src/tests.rs b/compiler/rustc_errors/src/tests.rs index 52103e4609770..8d029eb6e9429 100644 --- a/compiler/rustc_errors/src/tests.rs +++ b/compiler/rustc_errors/src/tests.rs @@ -26,12 +26,8 @@ fn make_dummy(ftl: &'static str) -> Dummy { let langid_en = langid!("en-US"); - #[cfg(parallel_compiler)] let mut bundle = FluentBundle::new_concurrent(vec![langid_en]); - #[cfg(not(parallel_compiler))] - let mut bundle = FluentBundle::new(vec![langid_en]); - bundle.add_resource(resource).expect("Failed to add FTL resources to the bundle."); Dummy { bundle } diff --git a/compiler/rustc_interface/src/interface.rs b/compiler/rustc_interface/src/interface.rs index be7fa9378ca66..3ed82947fe9af 100644 --- a/compiler/rustc_interface/src/interface.rs +++ b/compiler/rustc_interface/src/interface.rs @@ -58,6 +58,11 @@ impl Compiler { } } +#[allow(rustc::bad_opt_access)] +pub fn set_parallel_mode(sopts: &config::UnstableOptions) { + rustc_data_structures::sync::set(sopts.threads > 1); +} + /// Converts strings provided as `--cfg [cfgspec]` into a `crate_cfg`. pub fn parse_cfgspecs(cfgspecs: Vec) -> FxHashSet<(String, Option)> { rustc_span::create_default_session_if_not_set_then(move |_| { diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs index 612903810d211..52719eaa967ad 100644 --- a/compiler/rustc_interface/src/util.rs +++ b/compiler/rustc_interface/src/util.rs @@ -126,12 +126,15 @@ fn get_stack_size() -> Option { env::var_os("RUST_MIN_STACK").is_none().then_some(STACK_SIZE) } -#[cfg(not(parallel_compiler))] pub(crate) fn run_in_thread_pool_with_globals R + Send, R: Send>( edition: Edition, _threads: usize, f: F, ) -> R { + #[cfg(parallel_compiler)] + if _threads > 1 { + return run_in_threads_pool_with_globals(edition, _threads, f); + } // The "thread pool" is a single spawned thread in the non-parallel // compiler. We run on a spawned thread instead of the main thread (a) to // provide control over the stack size, and (b) to increase similarity with @@ -161,7 +164,7 @@ pub(crate) fn run_in_thread_pool_with_globals R + Send, R: Send>( } #[cfg(parallel_compiler)] -pub(crate) fn run_in_thread_pool_with_globals R + Send, R: Send>( +pub(crate) fn run_in_threads_pool_with_globals R + Send, R: Send>( edition: Edition, threads: usize, f: F, diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index fd8e49efea03a..065e7c3bf5c77 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -8,7 +8,7 @@ use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_data_structures::memmap::{Mmap, MmapMut}; use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher}; -use rustc_data_structures::sync::{join, par_iter, Lrc, ParallelIterator}; +use rustc_data_structures::sync::{join, par_for_each_in, Lrc}; use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_hir as hir; use rustc_hir::def::DefKind; @@ -2125,7 +2125,7 @@ fn prefetch_mir(tcx: TyCtxt<'_>) { return; } - par_iter(tcx.mir_keys(())).for_each(|&def_id| { + par_for_each_in(tcx.mir_keys(()), |&def_id| { let (encode_const, encode_opt) = should_encode_mir(tcx, def_id); if encode_const { diff --git a/compiler/rustc_middle/Cargo.toml b/compiler/rustc_middle/Cargo.toml index dfbe8ac0ba3dc..323ec9a3a8fc9 100644 --- a/compiler/rustc_middle/Cargo.toml +++ b/compiler/rustc_middle/Cargo.toml @@ -27,7 +27,7 @@ rustc_hir = { path = "../rustc_hir" } rustc_index = { path = "../rustc_index" } rustc_macros = { path = "../rustc_macros" } rustc_query_system = { path = "../rustc_query_system" } -rustc-rayon-core = { version = "0.5.0", optional = true } +rustc-rayon-core = { version = "0.5.0" } rustc-rayon = { version = "0.5.0", optional = true } rustc_serialize = { path = "../rustc_serialize" } rustc_session = { path = "../rustc_session" } @@ -39,4 +39,4 @@ thin-vec = "0.2.12" tracing = "0.1" [features] -rustc_use_parallel_compiler = ["rustc-rayon", "rustc-rayon-core"] +rustc_use_parallel_compiler = ["rustc-rayon"] diff --git a/compiler/rustc_middle/src/ty/context/tls.rs b/compiler/rustc_middle/src/ty/context/tls.rs index fb0d909307e78..6404efe3eca2a 100644 --- a/compiler/rustc_middle/src/ty/context/tls.rs +++ b/compiler/rustc_middle/src/ty/context/tls.rs @@ -4,8 +4,6 @@ use crate::dep_graph::TaskDepsRef; use crate::ty::query; use rustc_data_structures::sync::{self, Lock}; use rustc_errors::Diagnostic; -#[cfg(not(parallel_compiler))] -use std::cell::Cell; use std::mem; use std::ptr; use thin_vec::ThinVec; @@ -49,17 +47,8 @@ impl<'a, 'tcx> ImplicitCtxt<'a, 'tcx> { } } -// Import the thread-local variable from Rayon, which is preserved for Rayon jobs. -#[cfg(parallel_compiler)] use rayon_core::tlv::TLV; -// Otherwise define our own -#[cfg(not(parallel_compiler))] -thread_local! { - /// A thread local variable that stores a pointer to the current `ImplicitCtxt`. - static TLV: Cell<*const ()> = const { Cell::new(ptr::null()) }; -} - #[inline] fn erase(context: &ImplicitCtxt<'_, '_>) -> *const () { context as *const _ as *const () diff --git a/compiler/rustc_query_system/Cargo.toml b/compiler/rustc_query_system/Cargo.toml index 34e576379762b..308a36af1e556 100644 --- a/compiler/rustc_query_system/Cargo.toml +++ b/compiler/rustc_query_system/Cargo.toml @@ -16,7 +16,7 @@ rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_hir = { path = "../rustc_hir" } rustc_index = { path = "../rustc_index" } rustc_macros = { path = "../rustc_macros" } -rustc-rayon-core = { version = "0.5.0", optional = true } +rustc-rayon-core = { version = "0.5.0" } rustc_serialize = { path = "../rustc_serialize" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } @@ -27,4 +27,4 @@ thin-vec = "0.2.12" tracing = "0.1" [features] -rustc_use_parallel_compiler = ["rustc-rayon-core"] +rustc_use_parallel_compiler = [] diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index fd9e685ab8085..c27ca68cd306e 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -1159,6 +1159,12 @@ impl CurrentDepGraph { .get_or_alloc_cached_string("incr_comp_intern_dep_graph_node") .map(EventId::from_label); + let capacity = if rustc_data_structures::sync::active() { + new_node_count_estimate / sharded::SHARDS + } else { + new_node_count_estimate + }; + CurrentDepGraph { encoder: Steal::new(GraphEncoder::new( encoder, @@ -1167,10 +1173,7 @@ impl CurrentDepGraph { record_stats, )), new_node_to_index: Sharded::new(|| { - FxHashMap::with_capacity_and_hasher( - new_node_count_estimate / sharded::SHARDS, - Default::default(), - ) + FxHashMap::with_capacity_and_hasher(capacity, Default::default()) }), prev_index_to_index: Lock::new(IndexVec::from_elem_n(None, prev_graph_node_count)), anon_id_seed, diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs index 29f6a07e81beb..ea6a75a6367e1 100644 --- a/compiler/rustc_query_system/src/query/caches.rs +++ b/compiler/rustc_query_system/src/query/caches.rs @@ -2,7 +2,6 @@ use crate::dep_graph::DepNodeIndex; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sharded; -#[cfg(parallel_compiler)] use rustc_data_structures::sharded::Sharded; use rustc_data_structures::sync::Lock; use rustc_index::vec::{Idx, IndexVec}; @@ -37,10 +36,7 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<'tcx, V> for DefaultCacheSelecto } pub struct DefaultCache { - #[cfg(parallel_compiler)] cache: Sharded>, - #[cfg(not(parallel_compiler))] - cache: Lock>, } impl Default for DefaultCache { @@ -60,10 +56,8 @@ where #[inline(always)] fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> { let key_hash = sharded::make_hash(key); - #[cfg(parallel_compiler)] let lock = self.cache.get_shard_by_hash(key_hash).lock(); - #[cfg(not(parallel_compiler))] - let lock = self.cache.lock(); + let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key); if let Some((_, value)) = result { Some(*value) } else { None } @@ -71,29 +65,17 @@ where #[inline] fn complete(&self, key: K, value: V, index: DepNodeIndex) { - #[cfg(parallel_compiler)] let mut lock = self.cache.get_shard_by_value(&key).lock(); - #[cfg(not(parallel_compiler))] - let mut lock = self.cache.lock(); + // We may be overwriting another value. This is all right, since the dep-graph // will check that the fingerprint matches. lock.insert(key, (value, index)); } fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) { - #[cfg(parallel_compiler)] - { - let shards = self.cache.lock_shards(); - for shard in shards.iter() { - for (k, v) in shard.iter() { - f(k, &v.0, v.1); - } - } - } - #[cfg(not(parallel_compiler))] - { - let map = self.cache.lock(); - for (k, v) in map.iter() { + let shards = self.cache.lock_shards(); + for shard in shards.iter() { + for (k, v) in shard.iter() { f(k, &v.0, v.1); } } @@ -151,10 +133,7 @@ impl<'tcx, K: Idx, V: 'tcx> CacheSelector<'tcx, V> for VecCacheSelector { } pub struct VecCache { - #[cfg(parallel_compiler)] cache: Sharded>>, - #[cfg(not(parallel_compiler))] - cache: Lock>>, } impl Default for VecCache { @@ -173,38 +152,22 @@ where #[inline(always)] fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> { - #[cfg(parallel_compiler)] let lock = self.cache.get_shard_by_hash(key.index() as u64).lock(); - #[cfg(not(parallel_compiler))] - let lock = self.cache.lock(); + if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None } } #[inline] fn complete(&self, key: K, value: V, index: DepNodeIndex) { - #[cfg(parallel_compiler)] let mut lock = self.cache.get_shard_by_hash(key.index() as u64).lock(); - #[cfg(not(parallel_compiler))] - let mut lock = self.cache.lock(); + lock.insert(key, (value, index)); } fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) { - #[cfg(parallel_compiler)] - { - let shards = self.cache.lock_shards(); - for shard in shards.iter() { - for (k, v) in shard.iter_enumerated() { - if let Some(v) = v { - f(&k, &v.0, v.1); - } - } - } - } - #[cfg(not(parallel_compiler))] - { - let map = self.cache.lock(); - for (k, v) in map.iter_enumerated() { + let shards = self.cache.lock_shards(); + for shard in shards.iter() { + for (k, v) in shard.iter_enumerated() { if let Some(v) = v { f(&k, &v.0, v.1); } diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs index a534b54070cd0..6789f2f5f0976 100644 --- a/compiler/rustc_query_system/src/query/job.rs +++ b/compiler/rustc_query_system/src/query/job.rs @@ -15,7 +15,6 @@ use rustc_span::Span; use std::hash::Hash; use std::num::NonZeroU64; -#[cfg(parallel_compiler)] use { parking_lot::{Condvar, Mutex}, rayon_core, @@ -47,17 +46,14 @@ impl QueryJobId { map.get(&self).unwrap().query.clone() } - #[cfg(parallel_compiler)] fn span(self, map: &QueryMap) -> Span { map.get(&self).unwrap().job.span } - #[cfg(parallel_compiler)] fn parent(self, map: &QueryMap) -> Option { map.get(&self).unwrap().job.parent } - #[cfg(parallel_compiler)] fn latch(self, map: &QueryMap) -> Option<&QueryLatch> { map.get(&self).unwrap().job.latch.as_ref() } @@ -81,7 +77,6 @@ pub struct QueryJob { pub parent: Option, /// The latch that is used to wait on this job. - #[cfg(parallel_compiler)] latch: Option>, spooky: core::marker::PhantomData, } @@ -90,17 +85,9 @@ impl QueryJob { /// Creates a new query job. #[inline] pub fn new(id: QueryJobId, span: Span, parent: Option) -> Self { - QueryJob { - id, - span, - parent, - #[cfg(parallel_compiler)] - latch: None, - spooky: PhantomData, - } + QueryJob { id, span, parent, latch: None, spooky: PhantomData } } - #[cfg(parallel_compiler)] pub(super) fn latch(&mut self) -> QueryLatch { if self.latch.is_none() { self.latch = Some(QueryLatch::new()); @@ -114,17 +101,13 @@ impl QueryJob { /// as there are no concurrent jobs which could be waiting on us #[inline] pub fn signal_complete(self) { - #[cfg(parallel_compiler)] - { - if let Some(latch) = self.latch { - latch.set(); - } + if let Some(latch) = self.latch { + latch.set(); } } } impl QueryJobId { - #[cfg(not(parallel_compiler))] pub(super) fn find_cycle_in_stack( &self, query_map: QueryMap, @@ -185,7 +168,6 @@ impl QueryJobId { } } -#[cfg(parallel_compiler)] struct QueryWaiter { query: Option, condvar: Condvar, @@ -193,7 +175,6 @@ struct QueryWaiter { cycle: Lock>>, } -#[cfg(parallel_compiler)] impl QueryWaiter { fn notify(&self, registry: &rayon_core::Registry) { rayon_core::mark_unblocked(registry); @@ -201,19 +182,16 @@ impl QueryWaiter { } } -#[cfg(parallel_compiler)] struct QueryLatchInfo { complete: bool, waiters: Vec>>, } -#[cfg(parallel_compiler)] #[derive(Clone)] pub(super) struct QueryLatch { info: Lrc>>, } -#[cfg(parallel_compiler)] impl QueryLatch { fn new() -> Self { QueryLatch { @@ -284,7 +262,6 @@ impl QueryLatch { } /// A resumable waiter of a query. The usize is the index into waiters in the query's latch -#[cfg(parallel_compiler)] type Waiter = (QueryJobId, usize); /// Visits all the non-resumable and resumable waiters of a query. @@ -296,7 +273,6 @@ type Waiter = (QueryJobId, usize); /// For visits of resumable waiters it returns Some(Some(Waiter)) which has the /// required information to resume the waiter. /// If all `visit` calls returns None, this function also returns None. -#[cfg(parallel_compiler)] fn visit_waiters( query_map: &QueryMap, query: QueryJobId, @@ -332,7 +308,6 @@ where /// `span` is the reason for the `query` to execute. This is initially DUMMY_SP. /// If a cycle is detected, this initial value is replaced with the span causing /// the cycle. -#[cfg(parallel_compiler)] fn cycle_check( query_map: &QueryMap, query: QueryJobId, @@ -373,7 +348,6 @@ fn cycle_check( /// Finds out if there's a path to the compiler root (aka. code which isn't in a query) /// from `query` without going through any of the queries in `visited`. /// This is achieved with a depth first search. -#[cfg(parallel_compiler)] fn connected_to_root( query_map: &QueryMap, query: QueryJobId, @@ -396,7 +370,6 @@ fn connected_to_root( } // Deterministically pick an query from a list -#[cfg(parallel_compiler)] fn pick_query<'a, T, F, D>(query_map: &QueryMap, queries: &'a [T], f: F) -> &'a T where F: Fn(&T) -> (Span, QueryJobId), @@ -423,7 +396,6 @@ where /// the function return true. /// If a cycle was not found, the starting query is removed from `jobs` and /// the function returns false. -#[cfg(parallel_compiler)] fn remove_cycle( query_map: &QueryMap, jobs: &mut Vec, @@ -528,7 +500,6 @@ fn remove_cycle( /// uses a query latch and then resuming that waiter. /// There may be multiple cycles involved in a deadlock, so this searches /// all active queries for cycles before finally resuming all the waiters at once. -#[cfg(parallel_compiler)] pub fn deadlock(query_map: QueryMap, registry: &rayon_core::Registry) { let on_panic = OnDrop(|| { eprintln!("deadlock handler panicked, aborting process"); diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs index fa1f51b04da77..8abf8501b2ea1 100644 --- a/compiler/rustc_query_system/src/query/mod.rs +++ b/compiler/rustc_query_system/src/query/mod.rs @@ -2,7 +2,6 @@ mod plumbing; pub use self::plumbing::*; mod job; -#[cfg(parallel_compiler)] pub use self::job::deadlock; pub use self::job::{print_query_stack, QueryInfo, QueryJob, QueryJobId, QueryJobInfo, QueryMap}; @@ -37,7 +36,6 @@ pub struct QueryStackFrame { pub dep_kind: D, /// This hash is used to deterministically pick /// a query to remove cycles in the parallel compiler. - #[cfg(parallel_compiler)] hash: Hash64, } @@ -52,16 +50,7 @@ impl QueryStackFrame { ty_adt_id: Option, _hash: impl FnOnce() -> Hash64, ) -> Self { - Self { - description, - span, - def_id, - def_kind, - ty_adt_id, - dep_kind, - #[cfg(parallel_compiler)] - hash: _hash(), - } + Self { description, span, def_id, def_kind, ty_adt_id, dep_kind, hash: _hash() } } // FIXME(eddyb) Get more valid `Span`s on queries. diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs index bce01debc53b0..0bc35a6265dc4 100644 --- a/compiler/rustc_query_system/src/query/plumbing.rs +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -6,18 +6,17 @@ use crate::dep_graph::{DepContext, DepKind, DepNode, DepNodeIndex, DepNodeParams use crate::dep_graph::{DepGraphData, HasDepContext}; use crate::ich::StableHashingContext; use crate::query::caches::QueryCache; -#[cfg(parallel_compiler)] use crate::query::job::QueryLatch; use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobInfo}; use crate::query::SerializedDepNodeIndex; use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame}; use crate::HandleCycleError; +use rustc_data_structures::cold_path; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sharded::Sharded; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_data_structures::sync::Lock; -#[cfg(parallel_compiler)] -use rustc_data_structures::{cold_path, sharded::Sharded}; use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed, FatalError}; use rustc_span::{Span, DUMMY_SP}; use std::cell::Cell; @@ -30,10 +29,7 @@ use thin_vec::ThinVec; use super::QueryConfig; pub struct QueryState { - #[cfg(parallel_compiler)] active: Sharded>>, - #[cfg(not(parallel_compiler))] - active: Lock>>, } /// Indicates the state of a query for a given key in a query map. @@ -52,15 +48,8 @@ where D: DepKind, { pub fn all_inactive(&self) -> bool { - #[cfg(parallel_compiler)] - { - let shards = self.active.lock_shards(); - shards.iter().all(|shard| shard.is_empty()) - } - #[cfg(not(parallel_compiler))] - { - self.active.lock().is_empty() - } + let shards = self.active.lock_shards(); + shards.iter().all(|shard| shard.is_empty()) } pub fn try_collect_active_jobs( @@ -69,27 +58,11 @@ where make_query: fn(Qcx, K) -> QueryStackFrame, jobs: &mut QueryMap, ) -> Option<()> { - #[cfg(parallel_compiler)] - { - // We use try_lock_shards here since we are called from the - // deadlock handler, and this shouldn't be locked. - let shards = self.active.try_lock_shards()?; - for shard in shards.iter() { - for (k, v) in shard.iter() { - if let QueryResult::Started(ref job) = *v { - let query = make_query(qcx, *k); - jobs.insert(job.id, QueryJobInfo { query, job: job.clone() }); - } - } - } - } - #[cfg(not(parallel_compiler))] - { - // We use try_lock here since we are called from the - // deadlock handler, and this shouldn't be locked. - // (FIXME: Is this relevant for non-parallel compilers? It doesn't - // really hurt much.) - for (k, v) in self.active.try_lock()?.iter() { + // We use try_lock_shards here since we are called from the + // deadlock handler, and this shouldn't be locked. + let shards = self.active.try_lock_shards()?; + for shard in shards.iter() { + for (k, v) in shard.iter() { if let QueryResult::Started(ref job) = *v { let query = make_query(qcx, *k); jobs.insert(job.id, QueryJobInfo { query, job: job.clone() }); @@ -183,10 +156,8 @@ where cache.complete(key, result, dep_node_index); let job = { - #[cfg(parallel_compiler)] let mut lock = state.active.get_shard_by_value(&key).lock(); - #[cfg(not(parallel_compiler))] - let mut lock = state.active.lock(); + match lock.remove(&key).unwrap() { QueryResult::Started(job) => job, QueryResult::Poisoned => panic!(), @@ -208,10 +179,8 @@ where // Poison the query so jobs waiting on it panic. let state = self.state; let job = { - #[cfg(parallel_compiler)] let mut shard = state.active.get_shard_by_value(&self.key).lock(); - #[cfg(not(parallel_compiler))] - let mut shard = state.active.lock(); + let job = match shard.remove(&self.key).unwrap() { QueryResult::Started(job) => job, QueryResult::Poisoned => panic!(), @@ -254,7 +223,6 @@ where #[cold] #[inline(never)] -#[cfg(not(parallel_compiler))] fn cycle_error( query: Q, qcx: Qcx, @@ -274,7 +242,6 @@ where } #[inline(always)] -#[cfg(parallel_compiler)] fn wait_for_query( query: Q, qcx: Qcx, @@ -324,18 +291,14 @@ where Qcx: QueryContext, { let state = query.query_state(qcx); - #[cfg(parallel_compiler)] let mut state_lock = state.active.get_shard_by_value(&key).lock(); - #[cfg(not(parallel_compiler))] - let mut state_lock = state.active.lock(); - // For the parallel compiler we need to check both the query cache and query state structures // while holding the state lock to ensure that 1) the query has not yet completed and 2) the // query is not still executing. Without checking the query cache here, we can end up // re-executing the query since `try_start` only checks that the query is not currently // executing, but another thread may have already completed the query and stores it result // in the query cache. - if cfg!(parallel_compiler) && qcx.dep_context().sess().threads() > 1 { + if qcx.dep_context().sess().threads() > 1 { if let Some((value, index)) = query.query_cache(qcx).lookup(&key) { qcx.dep_context().profiler().query_cache_hit(index.into()); return (value, Some(index)); @@ -359,17 +322,15 @@ where } Entry::Occupied(mut entry) => { match entry.get_mut() { - #[cfg(not(parallel_compiler))] QueryResult::Started(job) => { - let id = job.id; - drop(state_lock); + if !rustc_data_structures::sync::active() { + let id = job.id; + drop(state_lock); - // If we are single-threaded we know that we have cycle error, - // so we just return the error. - cycle_error(query, qcx, id, span) - } - #[cfg(parallel_compiler)] - QueryResult::Started(job) => { + // If we are single-threaded we know that we have cycle error, + // so we just return the error. + return cycle_error(query, qcx, id, span); + } // Get the latch out let latch = job.latch(); drop(state_lock); diff --git a/src/librustdoc/clean/utils.rs b/src/librustdoc/clean/utils.rs index b802fd065fe54..aa456921c8f2f 100644 --- a/src/librustdoc/clean/utils.rs +++ b/src/librustdoc/clean/utils.rs @@ -470,12 +470,6 @@ pub(crate) fn get_auto_trait_and_blanket_impls( cx: &mut DocContext<'_>, item_def_id: DefId, ) -> impl Iterator { - // FIXME: To be removed once `parallel_compiler` bugs are fixed! - // More information in . - if cfg!(parallel_compiler) { - return vec![].into_iter().chain(vec![].into_iter()); - } - let auto_impls = cx .sess() .prof diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 4a88dc5254de6..48ff9f1c30eb4 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -728,6 +728,9 @@ fn main_args(at_args: &[String]) -> MainResult { } }; + // Set parallel mode early as the error handler will already create locks. + interface::set_parallel_mode(&options.unstable_opts); + let diag = core::new_handler( options.error_format, None,