Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reland: Improve performance of global code by emitting fewer atomic barriers. #47636

Merged
merged 3 commits into from
Apr 19, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7760,11 +7760,19 @@ static jl_llvm_functions_t

Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();

// step 11a. Emit the entry safepoint
// step 11a. For top-level code, load the world age
if (toplevel && !ctx.is_opaque_closure) {
LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
world->setOrdering(AtomicOrdering::Acquire);
ctx.builder.CreateAlignedStore(world, world_age_field, ctx.types().alignof_ptr);
}

// step 11b. Emit the entry safepoint
if (JL_FEAT_TEST(ctx, safepoint_on_entry))
emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);

// step 11b. Do codegen in control flow order
// step 11c. Do codegen in control flow order
std::vector<int> workstack;
std::map<int, BasicBlock*> BB;
std::map<size_t, BasicBlock*> come_from_bb;
Expand Down Expand Up @@ -8087,13 +8095,6 @@ static jl_llvm_functions_t
ctx.builder.SetInsertPoint(tryblk);
}
else {
if (!jl_is_method(ctx.linfo->def.method) && !ctx.is_opaque_closure) {
// TODO: inference is invalid if this has any effect (which it often does)
LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
world->setOrdering(AtomicOrdering::Acquire);
ctx.builder.CreateAlignedStore(world, world_age_field, ctx.types().alignof_ptr);
}
emit_stmtpos(ctx, stmt, cursor);
mallocVisitStmt(debuginfoloc, nullptr);
}
Expand Down Expand Up @@ -8319,12 +8320,12 @@ static jl_llvm_functions_t
}

// step 12. Perform any delayed instantiations
if (ctx.debug_enabled) {
bool in_prologue = true;
for (auto &BB : *ctx.f) {
for (auto &I : BB) {
CallBase *call = dyn_cast<CallBase>(&I);
if (call && !I.getDebugLoc()) {
bool in_prologue = true;
for (auto &BB : *ctx.f) {
for (auto &I : BB) {
CallBase *call = dyn_cast<CallBase>(&I);
if (call) {
if (ctx.debug_enabled && !I.getDebugLoc()) {
// LLVM Verifier: inlinable function call in a function with debug info must have a !dbg location
// make sure that anything we attempt to call has some inlining info, just in case optimization messed up
// (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
Expand All @@ -8333,12 +8334,24 @@ static jl_llvm_functions_t
I.setDebugLoc(topdebugloc);
}
}
if (&I == &prologue_end)
in_prologue = false;
if (toplevel && !ctx.is_opaque_closure && !in_prologue) {
// we're at toplevel; insert an atomic barrier between every instruction
// TODO: inference is invalid if this has any effect (which it often does)
LoadInst *world = new LoadInst(ctx.types().T_size,
prepare_global_in(jl_Module, jlgetworld_global), Twine(),
/*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
world->setOrdering(AtomicOrdering::Acquire);
StoreInst *store_world = new StoreInst(world, world_age_field,
/*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
(void)store_world;
}
}
if (&I == &prologue_end)
in_prologue = false;
}
dbuilder.finalize();
}
if (ctx.debug_enabled)
dbuilder.finalize();

if (ctx.vaSlot > 0) {
// remove VA allocation if we never referenced it
Expand Down