diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index 630832372704a..0efd24ae132f0 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -383,8 +383,13 @@ top_level_options!( // try to not rely on this too much. actually_rustdoc: bool [TRACKED], - // Number of object files/codegen units to produce on the backend + // Specifications of codegen units / ThinLTO which are forced as a + // result of parsing command line options. These are not necessarily + // what rustc was invoked with, but massaged a bit to agree with + // commands like `--emit llvm-ir` which they're often incompatible with + // if we otherwise use the defaults of rustc. cli_forced_codegen_units: Option [UNTRACKED], + cli_forced_thinlto: Option [UNTRACKED], } ); @@ -566,6 +571,7 @@ pub fn basic_options() -> Options { debug_assertions: true, actually_rustdoc: false, cli_forced_codegen_units: None, + cli_forced_thinlto: None, } } @@ -1165,7 +1171,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options, "run the non-lexical lifetimes MIR pass"), trans_time_graph: bool = (false, parse_bool, [UNTRACKED], "generate a graphical HTML report of time spent in trans and LLVM"), - thinlto: bool = (false, parse_bool, [TRACKED], + thinlto: Option = (None, parse_opt_bool, [TRACKED], "enable ThinLTO when possible"), inline_in_all_cgus: Option = (None, parse_opt_bool, [TRACKED], "control whether #[inline] functions are in all cgus"), @@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) let mut cg = build_codegen_options(matches, error_format); let mut codegen_units = cg.codegen_units; + let mut thinlto = None; // Issue #30063: if user requests llvm-related output to one // particular path, disable codegen-units. @@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) } early_warn(error_format, "resetting to default -C codegen-units=1"); codegen_units = Some(1); + thinlto = Some(false); } } - _ => codegen_units = Some(1), + _ => { + codegen_units = Some(1); + thinlto = Some(false); + } } } @@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) debug_assertions, actually_rustdoc: false, cli_forced_codegen_units: codegen_units, + cli_forced_thinlto: thinlto, }, cfg) } diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs index 227efcf4d6e21..df5805bacd41a 100644 --- a/src/librustc/session/mod.rs +++ b/src/librustc/session/mod.rs @@ -656,30 +656,91 @@ impl Session { return n as usize } + // Why is 16 codegen units the default all the time? + // + // The main reason for enabling multiple codegen units by default is to + // leverage the ability for the trans backend to do translation and + // codegen in parallel. This allows us, especially for large crates, to + // make good use of all available resources on the machine once we've + // hit that stage of compilation. Large crates especially then often + // take a long time in trans/codegen and this helps us amortize that + // cost. + // + // Note that a high number here doesn't mean that we'll be spawning a + // large number of threads in parallel. The backend of rustc contains + // global rate limiting through the `jobserver` crate so we'll never + // overload the system with too much work, but rather we'll only be + // optimizing when we're otherwise cooperating with other instances of + // rustc. + // + // Rather a high number here means that we should be able to keep a lot + // of idle cpus busy. By ensuring that no codegen unit takes *too* long + // to build we'll be guaranteed that all cpus will finish pretty closely + // to one another and we should make relatively optimal use of system + // resources + // + // Note that the main cost of codegen units is that it prevents LLVM + // from inlining across codegen units. Users in general don't have a lot + // of control over how codegen units are split up so it's our job in the + // compiler to ensure that undue performance isn't lost when using + // codegen units (aka we can't require everyone to slap `#[inline]` on + // everything). + // + // If we're compiling at `-O0` then the number doesn't really matter too + // much because performance doesn't matter and inlining is ok to lose. + // In debug mode we just want to try to guarantee that no cpu is stuck + // doing work that could otherwise be farmed to others. + // + // In release mode, however (O1 and above) performance does indeed + // matter! To recover the loss in performance due to inlining we'll be + // enabling ThinLTO by default (the function for which is just below). + // This will ensure that we recover any inlining wins we otherwise lost + // through codegen unit partitioning. + // + // --- + // + // Ok that's a lot of words but the basic tl;dr; is that we want a high + // number here -- but not too high. Additionally we're "safe" to have it + // always at the same number at all optimization levels. + // + // As a result 16 was chosen here! Mostly because it was a power of 2 + // and most benchmarks agreed it was roughly a local optimum. Not very + // scientific. match self.opts.optimize { - // If we're compiling at `-O0` then default to 16 codegen units. - // The number here shouldn't matter too too much as debug mode - // builds don't rely on performance at all, meaning that lost - // opportunities for inlining through multiple codegen units is - // a non-issue. - // - // Note that the high number here doesn't mean that we'll be - // spawning a large number of threads in parallel. The backend - // of rustc contains global rate limiting through the - // `jobserver` crate so we'll never overload the system with too - // much work, but rather we'll only be optimizing when we're - // otherwise cooperating with other instances of rustc. - // - // Rather the high number here means that we should be able to - // keep a lot of idle cpus busy. By ensuring that no codegen - // unit takes *too* long to build we'll be guaranteed that all - // cpus will finish pretty closely to one another and we should - // make relatively optimal use of system resources config::OptLevel::No => 16, + _ => 1, // FIXME(#46346) this should be 16 + } + } - // All other optimization levels default use one codegen unit, - // the historical default in Rust for a Long Time. - _ => 1, + /// Returns whether ThinLTO is enabled for this compilation + pub fn thinlto(&self) -> bool { + // If processing command line options determined that we're incompatible + // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option. + if let Some(enabled) = self.opts.cli_forced_thinlto { + return enabled + } + + // If explicitly specified, use that with the next highest priority + if let Some(enabled) = self.opts.debugging_opts.thinlto { + return enabled + } + + // If there's only one codegen unit and LTO isn't enabled then there's + // no need for ThinLTO so just return false. + if self.codegen_units() == 1 && !self.lto() { + return false + } + + // Right now ThinLTO isn't compatible with incremental compilation. + if self.opts.incremental.is_some() { + return false + } + + // Now we're in "defaults" territory. By default we enable ThinLTO for + // optimized compiles (anything greater than O0). + match self.opts.optimize { + config::OptLevel::No => false, + _ => true, } } } diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index da67940abcb77..cb883e0349f31 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt, // for doesn't require full LTO. Some targets require one LLVM module // (they effectively don't have a linker) so it's up to us to use LTO to // link everything together. - thinlto: sess.opts.debugging_opts.thinlto && - !sess.target.target.options.requires_lto, + thinlto: sess.thinlto() && + !sess.target.target.options.requires_lto && + unsafe { llvm::LLVMRustThinLTOAvailable() }, no_landing_pads: sess.no_landing_pads(), save_temps: sess.opts.cg.save_temps, diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index b7408681ed0c8..03c1e4368c9a3 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, check_for_rustc_errors_attr(tcx); - if tcx.sess.opts.debugging_opts.thinlto { + if let Some(true) = tcx.sess.opts.debugging_opts.thinlto { if unsafe { !llvm::LLVMRustThinLTOAvailable() } { tcx.sess.fatal("this compiler's LLVM does not support ThinLTO"); } diff --git a/src/libstd/sys_common/backtrace.rs b/src/libstd/sys_common/backtrace.rs index 9f0214f5f0510..b5cf6d7d34fcc 100644 --- a/src/libstd/sys_common/backtrace.rs +++ b/src/libstd/sys_common/backtrace.rs @@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write, // Note that this demangler isn't quite as fancy as it could be. We have lots // of other information in our symbols like hashes, version, type information, // etc. Additionally, this doesn't handle glue symbols at all. -pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> { - // First validate the symbol. If it doesn't look like anything we're +pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> { + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to + // symbol names. + let llvm = ".llvm."; + if let Some(i) = s.find(llvm) { + let candidate = &s[i + llvm.len()..]; + let all_hex = candidate.chars().all(|c| { + match c { + 'A' ... 'F' | '0' ... '9' => true, + _ => false, + } + }); + + if all_hex { + s = &s[..i]; + } + } + + // Validate the symbol. If it doesn't look like anything we're // expecting, we just print it literally. Note that we must handle non-rust // symbols because we could have any function in the backtrace. let mut valid = true; diff --git a/src/rustllvm/PassWrapper.cpp b/src/rustllvm/PassWrapper.cpp index 4a359fb3ad306..1e52ad571b8ec 100644 --- a/src/rustllvm/PassWrapper.cpp +++ b/src/rustllvm/PassWrapper.cpp @@ -11,6 +11,7 @@ #include #include +#include #include "rustllvm.h" @@ -885,86 +886,6 @@ getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { return FirstDefForLinker->get(); } -// This is a helper function we added that isn't present in LLVM's source. -// -// The way LTO works in Rust is that we typically have a number of symbols that -// we know ahead of time need to be preserved. We want to ensure that ThinLTO -// doesn't accidentally internalize any of these and otherwise is always -// ready to keep them linking correctly. -// -// This function will recursively walk the `GUID` provided and all of its -// references, as specified in the `Index`. In other words, we're taking a -// `GUID` as input, adding it to `Preserved`, and then taking all `GUID` -// items that the input references and recursing. -static void -addPreservedGUID(const ModuleSummaryIndex &Index, - DenseSet &Preserved, - GlobalValue::GUID GUID) { - if (Preserved.count(GUID)) - return; - Preserved.insert(GUID); - -#if LLVM_VERSION_GE(5, 0) - auto Info = Index.getValueInfo(GUID); - if (!Info) { - return; - } - for (auto &Summary : Info.getSummaryList()) { - for (auto &Ref : Summary->refs()) { - addPreservedGUID(Index, Preserved, Ref.getGUID()); - } - - GlobalValueSummary *GVSummary = Summary.get(); - if (isa(GVSummary)) { - auto *FS = cast(GVSummary); - for (auto &Call: FS->calls()) { - addPreservedGUID(Index, Preserved, Call.first.getGUID()); - } - for (auto &GUID: FS->type_tests()) { - addPreservedGUID(Index, Preserved, GUID); - } - } - if (isa(GVSummary)) { - auto *AS = cast(GVSummary); - auto GUID = AS->getAliasee().getOriginalName(); - addPreservedGUID(Index, Preserved, GUID); - } - } -#else - auto SummaryList = Index.findGlobalValueSummaryList(GUID); - if (SummaryList == Index.end()) - return; - for (auto &Summary : SummaryList->second) { - for (auto &Ref : Summary->refs()) { - if (Ref.isGUID()) { - addPreservedGUID(Index, Preserved, Ref.getGUID()); - } else { - auto Value = Ref.getValue(); - addPreservedGUID(Index, Preserved, Value->getGUID()); - } - } - - if (auto *FS = dyn_cast(Summary.get())) { - for (auto &Call: FS->calls()) { - if (Call.first.isGUID()) { - addPreservedGUID(Index, Preserved, Call.first.getGUID()); - } else { - auto Value = Call.first.getValue(); - addPreservedGUID(Index, Preserved, Value->getGUID()); - } - } - for (auto &GUID: FS->type_tests()) { - addPreservedGUID(Index, Preserved, GUID); - } - } - if (auto *AS = dyn_cast(Summary.get())) { - auto GUID = AS->getAliasee().getOriginalName(); - addPreservedGUID(Index, Preserved, GUID); - } - } -#endif -} - // The main entry point for creating the global ThinLTO analysis. The structure // here is basically the same as before threads are spawned in the `run` // function of `lib/LTO/ThinLTOCodeGenerator.cpp`. @@ -1004,12 +925,10 @@ LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, Ret->Index.collectDefinedGVSummariesPerModule(Ret->ModuleToDefinedGVSummaries); // Convert the preserved symbols set from string to GUID, this is then needed - // for internalization. We use `addPreservedGUID` to include any transitively - // used symbol as well. + // for internalization. for (int i = 0; i < num_symbols; i++) { - addPreservedGUID(Ret->Index, - Ret->GUIDPreservedSymbols, - GlobalValue::getGUID(preserved_symbols[i])); + auto GUID = GlobalValue::getGUID(preserved_symbols[i]); + Ret->GUIDPreservedSymbols.insert(GUID); } // Collect the import/export lists for all modules from the call-graph in the @@ -1038,7 +957,8 @@ LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, // Resolve LinkOnce/Weak symbols, this has to be computed early be cause it // impacts the caching. // - // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp` + // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp` with some of this + // being lifted from `lib/LTO/LTO.cpp` as well StringMap> ResolvedODR; DenseMap PrevailingCopy; for (auto &I : Ret->Index) { @@ -1062,11 +982,27 @@ LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; }; thinLTOResolveWeakForLinkerInIndex(Ret->Index, isPrevailing, recordNewLinkage); + + // Here we calculate an `ExportedGUIDs` set for use in the `isExported` + // callback below. This callback below will dictate the linkage for all + // summaries in the index, and we basically just only want to ensure that dead + // symbols are internalized. Otherwise everything that's already external + // linkage will stay as external, and internal will stay as internal. + std::set ExportedGUIDs; + for (auto &List : Ret->Index) { + for (auto &GVS: List.second) { + if (!GlobalValue::isExternalLinkage(GVS->linkage())) + continue; + auto GUID = GVS->getOriginalName(); + if (!DeadSymbols.count(GUID)) + ExportedGUIDs.insert(GUID); + } + } auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = Ret->ExportLists.find(ModuleIdentifier); return (ExportList != Ret->ExportLists.end() && ExportList->second.count(GUID)) || - Ret->GUIDPreservedSymbols.count(GUID); + ExportedGUIDs.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(Ret->Index, isExported); diff --git a/src/test/run-fail/mir_trans_no_landing_pads.rs b/src/test/run-fail/mir_trans_no_landing_pads.rs index dacb039d89dc5..bafb78fc213e3 100644 --- a/src/test/run-fail/mir_trans_no_landing_pads.rs +++ b/src/test/run-fail/mir_trans_no_landing_pads.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z no-landing-pads +// compile-flags: -Z no-landing-pads -C codegen-units=1 // error-pattern:converging_fn called use std::io::{self, Write}; diff --git a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs index 87037c1efed9e..998ee7470bbe9 100644 --- a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs +++ b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z no-landing-pads +// compile-flags: -Z no-landing-pads -C codegen-units=1 // error-pattern:diverging_fn called use std::io::{self, Write}; diff --git a/src/test/run-pass-fulldeps/auxiliary/issue_16723_multiple_items_syntax_ext.rs b/src/test/run-pass-fulldeps/auxiliary/issue-16723.rs similarity index 100% rename from src/test/run-pass-fulldeps/auxiliary/issue_16723_multiple_items_syntax_ext.rs rename to src/test/run-pass-fulldeps/auxiliary/issue-16723.rs diff --git a/src/test/run-pass-fulldeps/auxiliary/plugin_crate_outlive_expansion_phase.rs b/src/test/run-pass-fulldeps/auxiliary/outlive-expansion-phase.rs similarity index 100% rename from src/test/run-pass-fulldeps/auxiliary/plugin_crate_outlive_expansion_phase.rs rename to src/test/run-pass-fulldeps/auxiliary/outlive-expansion-phase.rs diff --git a/src/test/run-pass-fulldeps/issue_16723_multiple_items_syntax_ext.rs b/src/test/run-pass-fulldeps/issue-16723.rs similarity index 86% rename from src/test/run-pass-fulldeps/issue_16723_multiple_items_syntax_ext.rs rename to src/test/run-pass-fulldeps/issue-16723.rs index c1ffeb7c8e2e9..bacfa8d1ead5a 100644 --- a/src/test/run-pass-fulldeps/issue_16723_multiple_items_syntax_ext.rs +++ b/src/test/run-pass-fulldeps/issue-16723.rs @@ -9,9 +9,9 @@ // except according to those terms. // ignore-stage1 -// aux-build:issue_16723_multiple_items_syntax_ext.rs +// aux-build:issue-16723.rs #![feature(plugin)] -#![plugin(issue_16723_multiple_items_syntax_ext)] +#![plugin(issue_16723)] multiple_items!(); diff --git a/src/test/run-pass-fulldeps/macro-crate-outlive-expansion-phase.rs b/src/test/run-pass-fulldeps/outlive-expansion-phase.rs similarity index 83% rename from src/test/run-pass-fulldeps/macro-crate-outlive-expansion-phase.rs rename to src/test/run-pass-fulldeps/outlive-expansion-phase.rs index 9573d0c8c4030..6eb3e510724f3 100644 --- a/src/test/run-pass-fulldeps/macro-crate-outlive-expansion-phase.rs +++ b/src/test/run-pass-fulldeps/outlive-expansion-phase.rs @@ -8,10 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// aux-build:plugin_crate_outlive_expansion_phase.rs +// aux-build:outlive-expansion-phase.rs // ignore-stage1 #![feature(plugin)] -#![plugin(plugin_crate_outlive_expansion_phase)] +#![plugin(outlive_expansion_phase)] pub fn main() {} diff --git a/src/test/run-pass/no-landing-pads.rs b/src/test/run-pass/no-landing-pads.rs index e718046ebbcd7..73f123045d249 100644 --- a/src/test/run-pass/no-landing-pads.rs +++ b/src/test/run-pass/no-landing-pads.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z no-landing-pads +// compile-flags: -Z no-landing-pads -C codegen-units=1 // ignore-emscripten no threads support use std::thread; diff --git a/src/test/run-pass/specialization/auxiliary/specialization_cross_crate_defaults.rs b/src/test/run-pass/specialization/auxiliary/cross_crates_defaults.rs similarity index 100% rename from src/test/run-pass/specialization/auxiliary/specialization_cross_crate_defaults.rs rename to src/test/run-pass/specialization/auxiliary/cross_crates_defaults.rs diff --git a/src/test/run-pass/specialization/specialization-cross-crate-defaults.rs b/src/test/run-pass/specialization/cross-crate-defaults.rs similarity index 87% rename from src/test/run-pass/specialization/specialization-cross-crate-defaults.rs rename to src/test/run-pass/specialization/cross-crate-defaults.rs index 62c7e3e2e4431..132520dcb736e 100644 --- a/src/test/run-pass/specialization/specialization-cross-crate-defaults.rs +++ b/src/test/run-pass/specialization/cross-crate-defaults.rs @@ -8,13 +8,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// aux-build:specialization_cross_crate_defaults.rs +// aux-build:cross_crates_defaults.rs #![feature(specialization)] -extern crate specialization_cross_crate_defaults; +extern crate cross_crates_defaults; -use specialization_cross_crate_defaults::*; +use cross_crates_defaults::*; struct LocalDefault; struct LocalOverride;