diff --git a/src/Makefile b/src/Makefile index eb4b5d1266df9d..7972840a9656ed 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ RUNTIME_SRCS += jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api llvm-remove-addrspaces \ - llvm-remove-ni + llvm-remove-ni llvm-julia-licm FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 9d90a691557013..1a6f27bd318c19 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -694,8 +694,10 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop PM->add(createLICMPass()); + PM->add(createJuliaLICMPass()); PM->add(createLoopUnswitchPass()); PM->add(createLICMPass()); + PM->add(createJuliaLICMPass()); // Subsequent passes not stripping metadata from terminator PM->add(createInstSimplifyLegacyPass()); PM->add(createIndVarSimplifyPass()); diff --git a/src/jitlayers.h b/src/jitlayers.h index 7305736f6de649..8dd45c1f939f52 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -238,6 +238,7 @@ Pass *createGCInvariantVerifierPass(bool Strong); Pass *createPropagateJuliaAddrspaces(); Pass *createRemoveJuliaAddrspacesPass(); Pass *createRemoveNIPass(); +Pass *createJuliaLICMPass(); Pass *createMultiVersioningPass(); Pass *createAllocOptPass(); // Whether the Function is an llvm or julia intrinsic. diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp new file mode 100644 index 00000000000000..c7ef9d0468e09a --- /dev/null +++ b/src/llvm-julia-licm.cpp @@ -0,0 +1,134 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "llvm-version.h" + +#include +#include +#include "llvm/Analysis/LoopIterator.h" +#include +#include +#include + +#include "llvm-pass-helpers.h" + +#define DEBUG_TYPE "julia-licm" + +using namespace llvm; + +/* + * Julia LICM pass. + * This takes care of some julia intrinsics that is safe to move around/out of loops but + * can't be handled by LLVM's LICM. These intrinsics can be moved outside of + * loop context as well but it is inside a loop where they matter the most. + */ + +namespace { + +struct JuliaLICMPass : public LoopPass, public JuliaPassContext { + static char ID; + JuliaLICMPass() : LoopPass(ID) {}; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override + { + // Get the preheader block to move instructions into, + // required to run this pass. + BasicBlock *preheader = L->getLoopPreheader(); + if (!preheader) + return false; + BasicBlock *header = L->getHeader(); + initFunctions(*header->getModule()); + // Also require `gc_preserve_begin_func` whereas + // `gc_preserve_end_func` is optional since the input to + // `gc_preserve_end_func` must be from `gc_preserve_begin_func`. + if (!gc_preserve_begin_func) + return false; + auto LI = &getAnalysis().getLoopInfo(); + auto DT = &getAnalysis().getDomTree(); + + // Lazy initialization of exit blocks insertion points. + bool exit_pts_init = false; + SmallVector _exit_pts; + auto get_exit_pts = [&] () -> ArrayRef { + if (!exit_pts_init) { + exit_pts_init = true; + SmallVector exit_bbs; + L->getUniqueExitBlocks(exit_bbs); + for (BasicBlock *bb: exit_bbs) { + _exit_pts.push_back(&*bb->getFirstInsertionPt()); + } + } + return _exit_pts; + }; + + bool changed = false; + // Scan in the right order so that we'll hoist the `begin` + // before we consider sinking `end`. + LoopBlocksRPO worklist(L); + worklist.perform(LI); + for (auto *bb : worklist) { + for (BasicBlock::iterator II = bb->begin(), E = bb->end(); II != E;) { + auto call = dyn_cast(&*II++); + if (!call) + continue; + auto callee = call->getCalledValue(); + assert(callee); + // It is always legal to extend the preserve period + // so we only need to make sure it is legal to move/clone + // the calls. + // If all the input arguments dominates the whole loop we can + // hoist the `begin` and if a `begin` dominates the loop the + // corresponding `end` can be moved to the loop exit. + if (callee == gc_preserve_begin_func) { + bool canhoist = true; + for (Use &U : call->arg_operands()) { + // Check if all arguments are generated outside the loop + auto origin = dyn_cast(U.get()); + if (!origin) + continue; + if (!DT->properlyDominates(origin->getParent(), header)) { + canhoist = false; + break; + } + } + if (!canhoist) + continue; + call->moveBefore(preheader->getTerminator()); + changed = true; + } + else if (callee == gc_preserve_end_func) { + auto begin = cast(call->getArgOperand(0)); + if (!DT->properlyDominates(begin->getParent(), header)) + continue; + changed = true; + auto exit_pts = get_exit_pts(); + if (exit_pts.empty()) { + call->eraseFromParent(); + continue; + } + call->moveBefore(exit_pts[0]); + for (unsigned i = 1; i < exit_pts.size(); i++) { + // Clone exit + CallInst::Create(call, {}, exit_pts[i]); + } + } + } + } + return changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override + { + getLoopAnalysisUsage(AU); + } +}; + +char JuliaLICMPass::ID = 0; +static RegisterPass + Y("JuliaLICM", "LICM for julia specific intrinsics.", + false, false); +} + +Pass *createJuliaLICMPass() +{ + return new JuliaLICMPass(); +}