Skip to content

Commit

Permalink
Auto merge of rust-lang#133250 - DianQK:embed-bitcode-pgo, r=<try>
Browse files Browse the repository at this point in the history
The embedded bitcode should always be prepared for LTO/ThinLTO

Fixes rust-lang#115344. Fixes rust-lang#117220.

There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`.

When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module.

This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`.

r? nikic
  • Loading branch information
bors committed Dec 1, 2024
2 parents 5e1440a + e4d3be7 commit 99acc6a
Show file tree
Hide file tree
Showing 13 changed files with 280 additions and 71 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_cranelift/src/driver/aot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ fn produce_final_output_artifacts(
// to get rid of it.
for output_type in crate_output.outputs.keys() {
match *output_type {
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
// Cranelift doesn't have bitcode
// user_wants_bitcode = true;
// // Copy to .bc, but always keep the .0.bc. There is a later
Expand Down
8 changes: 7 additions & 1 deletion compiler/rustc_codegen_llvm/src/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::ffi::{CStr, CString};
use std::fs::File;
use std::mem::ManuallyDrop;
use std::path::Path;
use std::ptr::NonNull;
use std::sync::Arc;
use std::{io, iter, slice};

Expand Down Expand Up @@ -604,7 +605,7 @@ pub(crate) fn run_pass_manager(
debug!("running the pass manager");
let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
unsafe { write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage) }?;
debug!("lto done");
Ok(())
}
Expand Down Expand Up @@ -663,6 +664,11 @@ impl ThinBuffer {
ThinBuffer(buffer)
}
}

pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
let mut ptr = NonNull::new(ptr).unwrap();
ThinBuffer(unsafe { ptr.as_mut() })
}
}

impl ThinBufferMethods for ThinBuffer {
Expand Down
148 changes: 105 additions & 43 deletions compiler/rustc_codegen_llvm/src/back/write.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::ffi::{CStr, CString};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::ptr::null_mut;
use std::sync::Arc;
use std::{fs, slice, str};

Expand All @@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
TargetMachineFactoryFn,
};
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
Expand All @@ -41,7 +42,7 @@ use crate::errors::{
WithLlvmError, WriteBytecode,
};
use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
use crate::llvm::{self, DiagnosticInfo, PassManager};
use crate::llvm::{self, DiagnosticInfo, PassManager, ThinLTOBuffer};
use crate::type_::Type;
use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};

Expand Down Expand Up @@ -516,13 +517,24 @@ pub(crate) unsafe fn llvm_optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
thin_lto_buffer: Option<&mut *mut ThinLTOBuffer>,
config: &ModuleConfig,
opt_level: config::OptLevel,
opt_stage: llvm::OptStage,
) -> Result<(), FatalError> {
if thin_lto_buffer.is_some() {
assert!(
matches!(
opt_stage,
llvm::OptStage::PreLinkNoLTO
| llvm::OptStage::PreLinkFatLTO
| llvm::OptStage::PreLinkThinLTO
),
"the bitcode for LTO can only be obtained at the pre-link stage"
);
}
let unroll_loops =
opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
Expand Down Expand Up @@ -582,7 +594,9 @@ pub(crate) unsafe fn llvm_optimize(
config.no_prepopulate_passes,
config.verify_llvm_ir,
config.lint_llvm_ir,
using_thin_buffers,
thin_lto_buffer,
config.emit_thin_lto,
config.emit_thin_lto_summary,
config.merge_functions,
unroll_loops,
config.vectorize_slp,
Expand Down Expand Up @@ -637,7 +651,52 @@ pub(crate) unsafe fn optimize(
_ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
_ => llvm::OptStage::PreLinkNoLTO,
};
return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
// The embedded bitcode is used to run LTO/ThinLTO.
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
// this point.
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
|| config.emit_thin_lto_summary
{
Some(null_mut())
} else {
None
};
unsafe {
llvm_optimize(cgcx, dcx, module, thin_lto_buffer.as_mut(), config, opt_level, opt_stage)
}?;
if let Some(thin_lto_buffer) = thin_lto_buffer {
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
let thin_bc_out =
if let Some(incr_comp_session_dir) = cgcx.incr_comp_session_dir.as_ref() {
incr_comp_session_dir.join(pre_lto_embed_bitcode_filename(module_name.unwrap()))
} else {
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name)
};
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
}
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin_lto_buffer.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
}
}
Ok(())
}
Expand Down Expand Up @@ -716,61 +775,60 @@ pub(crate) unsafe fn codegen(
// requested.
// - If we don't have the integrated assembler then we need to emit
// asm from LLVM and use `gcc` to create the object file.

let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);

if config.bitcode_needed() {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
let data = thin.data();

if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}

if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);

let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}

// If the object file of the target spec is bitcode, what happens when performing LTO in Rust?
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let thin = {
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_make_bitcode",
&*module.name,
);
ThinBuffer::new(llmod, config.emit_thin_lto, false)
};
let data = thin.data();
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}
if let Err(err) = fs::write(&bc_out, data) {
dcx.emit_err(WriteBytecode { path: &bc_out, err });
}
}

if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
&& module.kind == ModuleKind::Regular
{
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
let thin_bc_out = if let Some(incr_comp_session_dir) =
cgcx.incr_comp_session_dir.as_ref()
{
incr_comp_session_dir.join(pre_lto_embed_bitcode_filename(module_name.unwrap()))
} else {
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name)
};
assert!(
thin_bc_out.exists(),
"cannot find {} as embedded bitcode",
thin_bc_out.display()
);
let data = fs::read(&thin_bc_out).unwrap();
if cgcx.incr_comp_session_dir.is_none() {
debug!("removing embed bitcode file {:?}", thin_bc_out);
ensure_removed(dcx, &thin_bc_out);
}
unsafe {
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
}
}
}
Expand Down Expand Up @@ -1136,3 +1194,7 @@ fn record_llvm_cgu_instructions_stats(prof: &SelfProfilerRef, llmod: &llvm::Modu
serde_json::from_str(&raw_stats).expect("cannot parse llvm cgu instructions stats");
prof.artifact_size("cgu_instructions", module, total);
}

fn pre_lto_embed_bitcode_filename(module_name: &str) -> String {
format!("{module_name}.{}", OutputType::ThinBitcode.extension())
}
4 changes: 3 additions & 1 deletion compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2251,7 +2251,9 @@ unsafe extern "C" {
NoPrepopulatePasses: bool,
VerifyIR: bool,
LintIR: bool,
UseThinLTOBuffers: bool,
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
EmitThinLTO: bool,
EmitThinLTOSummary: bool,
MergeFunctions: bool,
UnrollLoops: bool,
SLPVectorize: bool,
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_codegen_ssa/src/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,9 @@ fn produce_final_output_artifacts(
// them for making an rlib.
copy_if_one_unit(OutputType::Bitcode, true);
}
OutputType::ThinBitcode => {
copy_if_one_unit(OutputType::ThinBitcode, true);
}
OutputType::ThinLinkBitcode => {
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
}
Expand Down
Loading

0 comments on commit 99acc6a

Please sign in to comment.