From c2d1343f611ce3ac494a98a0c3dd1606c1b7e91d Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 1 Apr 2021 13:17:14 -0400 Subject: [PATCH] Faster incremental sysimg rebuilds This commit provides the ability to rebuild system images much faster. The key observation is that most of the time in sysimage build is spent in LLVM generating native code (serializing julia's data structures is quite fast). Thus if we can re-use the code already generated for the system image we're currently running, we'll save a fair amount of time. Unfortunately, this is not 100% straightforward since we were assuming that no linking happens in a number of places. This PR hacks around that, but it is not a particularly satisfying long term solution. That said, it should work fine, and I think it's worth doing, so that we can explore the workflow adjustments that would rely on this. With that said, here's how to use this (at the low level, of course PkgCompiler would just handle this) ```shell $ mkdir chained $ time ./usr/bin/julia --sysimage-native-code=chained --sysimage=usr/lib/julia/sys.so --output-o chained/chained.o.a -e 'Base.__init_build();' real 0m9.633s user 0m8.613s sys 0m1.020s $ cp ../usr/lib/julia/sys-o.a . # Get the -o.a from the old sysimage $ ar x sys-o.a # Extract it into text.o and data.o $ rm data.o # rm the serialized sysimg data $ mv text.o text-old.o $ llvm-objcopy --remove-section .data.jl.unique text-old.o # rm the link between the native code and the old sysimg data $ ar x chained.o.a # Extract new sysimage files $ gcc -shared -o chained.so text.o data.o text-old.o # Link everything $ ../julia --sysimage=chained.so ``` As can be seen, regenerating the system image took about 9s (the subsequent commands aren't timed here, but take less than a second total). This compares very favorably with a non-chained sysimg rebuild: ``` time ./usr/bin/julia --sysimage=usr/lib/julia/sys.so --output-o nonchained.o.a -e 'Base.__init_build();' real 2m42.667s user 2m39.211s sys 0m3.452s ``` Of course if you do load additional packages, the extra code does still need to be compiled, so e.g. building a system image for `Plots` goes from 3 mins to 1 mins (building all of plots, plus everything in base that got invalidated). That is still all in LLVM though - it should be relatively straightforward to multithread that after this PR (since linking the sysimg in multiple pieces is allowed). That part is not implemented yet though. --- base/Base.jl | 8 +++ src/aotcompile.cpp | 95 +++++++++++++++++++++++++++++++----- src/ccall.cpp | 4 +- src/cgutils.cpp | 29 ++++++++--- src/codegen.cpp | 8 +-- src/debuginfo.cpp | 2 +- src/jitlayers.cpp | 45 ++++++++++++++++- src/jitlayers.h | 5 +- src/jloptions.c | 4 +- src/julia.h | 1 + src/julia_internal.h | 9 ++++ src/llvm-multiversioning.cpp | 6 ++- src/llvm-ptls.cpp | 23 ++++++--- src/processor.cpp | 19 ++++---- src/staticdata.c | 36 ++++++++++---- 15 files changed, 236 insertions(+), 58 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 3b531738276100..9fe8ce163a0233 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -458,6 +458,14 @@ function __init__() nothing end +function __init_build() + reinit_stdio() + Sys.__init_build() + init_depot_path() + init_load_path() + init_active_project() +end + end end # baremodule Base diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 5dbdf7000f5e78..61a698b6efe5e9 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -54,7 +54,6 @@ #include #include - using namespace llvm; // our passes @@ -181,10 +180,12 @@ static void emit_offset_table(Module &mod, const std::vector &vars addrs[i] = ConstantExpr::getBitCast(var, T_psize); } ArrayType *vars_type = ArrayType::get(T_psize, nvars); - new GlobalVariable(mod, vars_type, true, + GlobalVariable *GV = + new GlobalVariable(mod, vars_type, true, GlobalVariable::ExternalLinkage, ConstantArray::get(vars_type, addrs), name); + GV->setSection(JL_SYSIMG_LINK_SECTION); } static bool is_safe_char(unsigned char c) @@ -276,6 +277,32 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance *ci_out = codeinst; } +extern FunctionType *jl_func_sig; + +StringRef lookup_sysimage_fname(void *ptr, jl_code_instance_t *codeinst) +{ + if (ptr == (void*)&jl_fptr_args) { + return "jl_fptr_args"; + } else if (ptr == (void*)&jl_fptr_sparam) { + return "jl_fptr_sparam"; + } + return jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)ptr, codeinst); +} + +extern Type *T_pjlvalue; +static void add_gv(void *ctx, void *mod, jl_value_t **gv_slot) +{ + jl_codegen_params_t *params = (jl_codegen_params_t*)ctx; + Module *M = (Module *)mod; + + GlobalVariable* &lgv = params->globals[*gv_slot]; + assert(!lgv); + lgv = new GlobalVariable(*M, T_pjlvalue, + false, GlobalVariable::PrivateLinkage, + NULL, jl_ExecutionEngine->getGlobalAtAddress((uintptr_t)gv_slot)); + lgv->setExternallyInitialized(true); +} + // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing @@ -302,6 +329,10 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p imaging_mode = 1; std::unique_ptr clone(jl_create_llvm_module("text")); + if (jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED) { + jl_foreach_sysimg_gvar_slot(add_gv, (void*)¶ms, (void*)clone.get()); + } + // compile all methods for the current world and type-inference world size_t compile_for[] = { jl_typeinf_world, jl_world_counter }; for (int worlds = 0; worlds < 2; worlds++) { @@ -330,6 +361,18 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p // find and prepare the source code to compile jl_code_instance_t *codeinst = NULL; jl_ci_cache_lookup(cgparams, mi, params.world, &codeinst, &src); + jl_llvm_functions_t fnames = { + lookup_sysimage_fname((void*)codeinst->invoke, codeinst).str(), + lookup_sysimage_fname(codeinst->specptr.fptr, codeinst).str(), + }; + if (fnames.functionObject.rfind("jsys", 0) != 0 && + fnames.specFunctionObject.rfind("jsys", 0) != 0) { + // Skip things already in the sysimage, we'll pick it up + // from there. + std::unique_ptr no_module(nullptr); + emitted[codeinst] = std::make_tuple(std::move(no_module), fnames); + continue; + } if (src && !emitted.count(codeinst)) { // now add it to our compilation results JL_GC_PROMISE_ROOTED(codeinst->rettype); @@ -356,11 +399,25 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p // clones the contents of the module `m` to the shadow_output collector // while examining and recording what kind of function pointer we have for (auto &def : emitted) { - jl_merge_module(clone.get(), std::move(std::get<0>(def.second))); jl_code_instance_t *this_code = def.first; jl_llvm_functions_t decls = std::get<1>(def.second); StringRef func = decls.functionObject; StringRef cfunc = decls.specFunctionObject; + if (std::get<0>(def.second)) + jl_merge_module(clone.get(), std::move(std::get<0>(def.second))); + else { + // TODO: Probably wait until all other modules were merged + // TODO: These signatures aren't actually right, but it's not worth + // trying to compute signatures for these. Maybe declare them as + // void* global variables instead and have jl_merge_module know + // how to merge them if it comes to it? + Function::Create(jl_func_sig, + GlobalVariable::ExternalLinkage, + func, clone.get()); + Function::Create(jl_func_sig, + GlobalVariable::ExternalLinkage, + cfunc, clone.get()); + } uint32_t func_id = 0; uint32_t cfunc_id = 0; if (func == "jl_fptr_args") { @@ -389,8 +446,10 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p // and set them to be internalized and initialized at startup for (auto &global : gvars) { GlobalVariable *G = cast(clone->getNamedValue(global)); - G->setInitializer(ConstantPointerNull::get(cast(G->getValueType()))); - G->setLinkage(GlobalVariable::InternalLinkage); + if (!G->isExternallyInitialized()) + G->setInitializer(ConstantPointerNull::get(cast(G->getValueType()))); + G->setLinkage(GlobalVariable::ExternalLinkage); + G->setVisibility(GlobalVariable::HiddenVisibility); data->jl_sysimg_gvars.push_back(G); } @@ -409,8 +468,12 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p if (policy == CompilationPolicy::Default) { for (GlobalObject &G : clone->global_objects()) { if (!G.isDeclaration()) { - G.setLinkage(Function::InternalLinkage); - makeSafeName(G); + if (G.getLinkage() != GlobalVariable::InternalLinkage) { + G.setLinkage(Function::ExternalLinkage); + G.setVisibility(GlobalVariable::HiddenVisibility); + } + if (isa(&G) && !cast(&G)->isExternallyInitialized()) + makeSafeName(G); addComdat(&G); #if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_) // Add unwind exception personalities to functions to handle async exceptions @@ -515,10 +578,13 @@ void jl_dump_native(void *native_code, std::vector unopt_bc_Archive; std::vector outputs; + bool sysimg_chained = jl_options.use_sysimage_native_code == + JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + if (unopt_bc_fname) PM.add(createBitcodeWriterPass(unopt_bc_OS)); if (bc_fname || obj_fname || asm_fname) { - addOptimizationPasses(&PM, jl_options.opt_level, true, true); + addOptimizationPasses(&PM, jl_options.opt_level, true, true, sysimg_chained); addMachinePasses(&PM, TM.get()); } if (bc_fname) @@ -550,12 +616,15 @@ void jl_dump_native(void *native_code, // reflect the address of the jl_RTLD_DEFAULT_handle variable // back to the caller, so that we can check for consistency issues GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(data->M.get()); - addComdat(new GlobalVariable(*data->M, + GlobalVariable *jlRTLD_DEFAULT_var_pointer = + new GlobalVariable(*data->M, jlRTLD_DEFAULT_var->getType(), true, GlobalVariable::ExternalLinkage, jlRTLD_DEFAULT_var, - "jl_RTLD_DEFAULT_handle_pointer")); + "jl_RTLD_DEFAULT_handle_pointer"); + jlRTLD_DEFAULT_var_pointer->setSection(JL_SYSIMG_LINK_SECTION); + addComdat(jlRTLD_DEFAULT_var_pointer); } // do the actual work @@ -627,7 +696,7 @@ void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM) // this defines the set of optimization passes defined for Julia at various optimization levels. // it assumes that the TLI and TTI wrapper passes have already been added. void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, - bool lower_intrinsics, bool dump_native) + bool lower_intrinsics, bool dump_native, bool chained) { #ifdef JL_DEBUG_BUILD PM->add(createGCInvariantVerifierPass(true)); @@ -660,7 +729,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, PM->add(createRemoveNIPass()); } PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop - if (dump_native) + if (dump_native && !chained) PM->add(createMultiVersioningPass()); #if defined(JL_ASAN_ENABLED) PM->add(createAddressSanitizerFunctionPass()); @@ -695,7 +764,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, // consider AggressiveInstCombinePass at optlevel > 2 PM->add(createInstructionCombiningPass()); PM->add(createCFGSimplificationPass()); - if (dump_native) + if (dump_native && !chained) PM->add(createMultiVersioningPass()); PM->add(createSROAPass()); PM->add(createInstSimplifyLegacyPass()); diff --git a/src/ccall.cpp b/src/ccall.cpp index 66ab84c264f3ab..7466bbcefe44d9 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -12,7 +12,9 @@ extern const char jl_crtdll_basename[]; // somewhat unusual variable, in that aotcompile wants to get the address of this for a sanity check GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M) { - return prepare_global_in(M, jlRTLD_DEFAULT_var); + GlobalVariable *var = prepare_global_in(M, jlRTLD_DEFAULT_var); + var->setSection(JL_SYSIMG_LINK_SECTION); + return var; } // Find or create the GVs for the library and symbol lookup. diff --git a/src/cgutils.cpp b/src/cgutils.cpp index bb0e15ed7363cf..cfd3c3590968e2 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -68,6 +68,7 @@ static Value *stringConstPtr( StringRef ctxt(txt.c_str(), txt.size() + 1); Constant *Data = ConstantDataArray::get(jl_LLVMContext, arrayRefFromStringRef(ctxt)); GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M); + gv->setLinkage(GlobalVariable::InternalLinkage); Value *zero = ConstantInt::get(Type::getInt32Ty(jl_LLVMContext), 0); Value *Args[] = { zero, zero }; return irbuilder.CreateInBoundsGEP(gv->getValueType(), gv, Args); @@ -206,16 +207,25 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr) if (!gv) { raw_string_ostream(gvname) << cname << ctx.global_targets.size(); localname = StringRef(gvname); + gv = new GlobalVariable(*M, T_pjlvalue, + false, GlobalVariable::PrivateLinkage, + NULL, localname); } else { localname = gv->getName(); - if (gv->getParent() != M) + if (gv->getParent() != M) { + GlobalVariable *oldgv = gv; gv = cast_or_null(M->getNamedValue(localname)); + if (!gv) { + gv = new GlobalVariable(*M, T_pjlvalue, + false, oldgv->getLinkage(), + NULL, localname); + if (oldgv->isExternallyInitialized()) { + gv->setExternallyInitialized(true); + } + } + } } - if (gv == nullptr) - gv = new GlobalVariable(*M, T_pjlvalue, - false, GlobalVariable::PrivateLinkage, - NULL, localname); // LLVM passes sometimes strip metadata when moving load around // since the load at the new location satisfy the same condition as the origional one. // Mark the global as constant to LLVM code using our own metadata @@ -295,7 +305,7 @@ static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p) return julia_pgv(ctx, "jl_sym#", addr, NULL, p); } // something else gets just a generic name - return julia_pgv(ctx, "jl_global#", p); + return julia_pgv(ctx, "jl_global#abc#", p); } static size_t dereferenceable_size(jl_value_t *jt) @@ -1524,8 +1534,11 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x) Value *data = x.V; if (x.constant) { Constant *val = julia_const_to_llvm(ctx, x.constant); - if (val) - data = get_pointer_to_constant(ctx.emission_context, val, "_j_const", *jl_Module); + if (val) { + GlobalVariable *gv = get_pointer_to_constant(ctx.emission_context, val, "_j_const", *jl_Module); + gv->setLinkage(GlobalVariable::InternalLinkage); + data = gv; + } else data = literal_pointer_val(ctx, x.constant); } diff --git a/src/codegen.cpp b/src/codegen.cpp index f0edf7b19fa29f..25e458fd0524ff 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -165,13 +165,13 @@ static DataLayout &jl_data_layout = *(new DataLayout("")); // types static Type *T_jlvalue; -static Type *T_pjlvalue; +Type *T_pjlvalue; static Type *T_prjlvalue; static Type *T_ppjlvalue; static Type *T_pprjlvalue; static Type *jl_array_llvmt; static Type *jl_parray_llvmt; -static FunctionType *jl_func_sig; +FunctionType *jl_func_sig; static FunctionType *jl_func_sig_sparams; static Type *T_pvoidfunc; @@ -1294,7 +1294,9 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_ { Value *loc; if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr - loc = get_pointer_to_constant(ctx.emission_context, cast(v), "_j_const", *jl_Module); + GlobalVariable *gv = get_pointer_to_constant(ctx.emission_context, cast(v), "_j_const", *jl_Module); + gv->setLinkage(GlobalVariable::InternalLinkage); + loc = gv; } else { loc = emit_static_alloca(ctx, v->getType()); diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index b0243b2f3f4ff6..c58b512a0d1468 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -734,7 +734,7 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info) std::move(SplitFile.get())); } -static uint64_t jl_sysimage_base; +uint64_t jl_sysimage_base; static jl_sysimg_fptrs_t sysimg_fptrs; static jl_method_instance_t **sysimg_fvars_linfo; static size_t sysimg_fvars_n; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 77ce3c93eadd34..4af5304a2cd6e5 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -33,6 +33,18 @@ RTDyldMemoryManager* createRTDyldMemoryManager(void); void jl_init_jit(void) { } +extern "C" +void jl_init_sysimage_chaining(void *sysimg_base, char *fname) +{ + auto errorobj = llvm::object::ObjectFile::createObjectFile(fname); + if (!errorobj) { + jl_error("Failed to load sysimg symbol table"); + } + + auto *theobj = errorobj->getBinary(); + jl_ExecutionEngine->addSysimgSymbolsByName(sysimg_base, theobj); +} + // Snooping on which functions are being compiled, and how long it takes JL_STREAM *dump_compiles_stream = NULL; extern "C" JL_DLLEXPORT @@ -757,9 +769,9 @@ void JuliaOJIT::addModule(std::unique_ptr M) SectionMemoryManager::getSymbolAddressInProcess( getMangledName(F->getName())))) { llvm::errs() << "FATAL ERROR: " - << "Symbol \"" << F->getName().str() << "\"" + << "Symbol \"" << F->getName().str() << "\" " << "not found"; - abort(); + abort(); } } } @@ -849,6 +861,35 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod return fname; } +StringRef JuliaOJIT::getGlobalAtAddress(uint64_t Addr) +{ + auto fname = ReverseLocalSymbolTable[(void*)(uintptr_t)Addr]; + assert(!fname.empty()); + return fname; +} + +void JuliaOJIT::addSysimgSymbolsByName(void *sysimg_base, llvm::object::ObjectFile *ofile) +{ + for (auto symbol : ofile->symbols()) { + if (symbol.getType().get() != llvm::object::SymbolRef::ST_Function && + symbol.getType().get() != llvm::object::SymbolRef::ST_Data) { + continue; + } + if (symbol.getFlags().get() & llvm::object::SymbolRef::SF_Undefined) { + continue; + } + void *Addr = (void*)((char*)sysimg_base + symbol.getAddress().get()); + auto &fname = ReverseLocalSymbolTable[Addr]; + if (fname.empty()) { + StringRef symname = symbol.getName().get(); + jl_sym_t *symsym = jl_symbol_n(symname.data(), symname.size()); + fname = StringRef(jl_symbol_name(symsym), symname.size()); + assert(!fname.empty()); + addGlobalMapping(fname, (uintptr_t)Addr); + } + } +} + void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) { diff --git a/src/jitlayers.h b/src/jitlayers.h index b517711185e454..8481c160da2b26 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "julia_assert.h" @@ -20,7 +21,7 @@ extern TargetMachine *jl_TargetMachine; extern bool imaging_mode; void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM); -void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false); +void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool chained=false); void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM); void jl_finalize_module(std::unique_ptr m); void jl_merge_module(Module *dest, std::unique_ptr src); @@ -196,6 +197,8 @@ class JuliaOJIT { uint64_t getGlobalValueAddress(StringRef Name); uint64_t getFunctionAddress(StringRef Name); StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst); + StringRef getGlobalAtAddress(uint64_t Addr); + void addSysimgSymbolsByName(void *sysimg_base, llvm::object::ObjectFile *ofile); const DataLayout& getDataLayout() const; const Triple& getTargetTriple() const; size_t getTotalBytes() const; diff --git a/src/jloptions.c b/src/jloptions.c index cc5c85a06f0340..1b40e627d123df 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -390,7 +390,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) jl_errorf("julia: invalid argument to --banner={yes|no|auto} (%s)", optarg); break; case opt_sysimage_native_code: - if (!strcmp(optarg,"yes")) + if (!strcmp(optarg,"chained")) + jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + else if (!strcmp(optarg,"yes")) jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES; else if (!strcmp(optarg,"no")) jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO; diff --git a/src/julia.h b/src/julia.h index 3b28558fcd695c..4977fd0e0229cb 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2085,6 +2085,7 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_OPTIONS_HANDLE_SIGNALS_ON 1 #define JL_OPTIONS_HANDLE_SIGNALS_OFF 0 +#define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED 2 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES 1 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO 0 diff --git a/src/julia_internal.h b/src/julia_internal.h index 96b01f153760f4..4bb8f2ff635ef1 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -674,6 +674,9 @@ void jl_init_debuginfo(void); void jl_init_thread_heap(jl_ptls_t ptls); void jl_init_int32_int64_cache(void); +void jl_init_sysimage_chaining(void *sysimg_base, char *fname); +void jl_foreach_sysimg_gvar_slot(void (*fptr)(void *, void *, jl_value_t **), void *ctx1, void *ctx2); + void jl_teardown_codegen(void); void _julia_init(JL_IMAGE_SEARCH rel); @@ -1380,6 +1383,12 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT; float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT; uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT; +#ifdef _OS_DARWIN_ +#define JL_SYSIMG_LINK_SECTION "__DATA,__jl_sysimg_link" +#else +#define JL_SYSIMG_LINK_SECTION ".data.jl.sysimg_link" +#endif + #ifdef __cplusplus } #endif diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 68081eb53d3a5b..d2aff6ba24a258 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -884,10 +884,12 @@ Constant *CloneCtx::emit_offset_table(const std::vector &vars, StringRef nam for (uint32_t i = 1; i < nvars; i++) offsets[i + 1] = get_ptrdiff32(vars[i], vbase); ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1); - add_comdat(new GlobalVariable(M, vars_type, true, + GlobalVariable *GV = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage, ConstantArray::get(vars_type, offsets), - name + "_offsets")); + name + "_offsets"); + GV->setSection(JL_SYSIMG_LINK_SECTION); + add_comdat(GV); return vbase; } diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 6fbc40ceff0c4b..5c54dbbeba8fb4 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -140,14 +140,23 @@ Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) c GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const { - // Create a static global variable and points a global alias to it so that - // the address is visible externally but LLVM can still assume that the - // address of this variable doesn't need dynamic relocation - // (can be accessed with a single PC-rel load). - auto GV = new GlobalVariable(*M, T, false, GlobalVariable::InternalLinkage, +#ifndef _OS_DARWIN_ + // ELF linkers are picky about DSO-local references. Trick them by adding + // an extra global with the same address, but different linkage. This + // allows LLVM to use a PIC-rel reference, while still making the symbol + // available for dlsym. + auto GV = new GlobalVariable(*M, T, false, GlobalVariable::WeakODRLinkage, Constant::getNullValue(T), name + ".real"); - add_comdat(GlobalAlias::create(T, 0, GlobalVariable::ExternalLinkage, + GV->setVisibility(GlobalVariable::HiddenVisibility); + GV->setDSOLocal(true); + add_comdat(GlobalAlias::create(T, 0, GlobalVariable::WeakODRLinkage, name, GV, M)); +#else + auto GV = new GlobalVariable(*M, T, false, GlobalVariable::CommonLinkage, + Constant::getNullValue(T), name); + GV->setVisibility(GlobalVariable::DefaultVisibility); + GV->setDSOLocal(true); +#endif return GV; } @@ -254,7 +263,7 @@ bool LowerPTLS::runOnModule(Module &_M) T_pint8 = T_int8->getPointerTo(); if (imaging_mode) { ptls_slot = create_aliased_global(T_ptls_getter, "jl_get_ptls_states_slot"); - ptls_offset = create_aliased_global(T_size, "jl_tls_offset"); + ptls_offset = create_aliased_global(T_size, "jl_sysimg_tls_offset"); } for (auto it = ptls_getter->user_begin(); it != ptls_getter->user_end();) { diff --git a/src/processor.cpp b/src/processor.cpp index e94129b22b5a37..1df50c250bf2d8 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -623,26 +623,27 @@ static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) { jl_sysimg_fptrs_t res = {nullptr, 0, nullptr, nullptr, 0, nullptr, nullptr}; - // .data base - char *data_base; - jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 0); + int32_t *offsets; + jl_dlsym(hdl, "jl_sysimg_fvars_offsets", (void**)&offsets, 0); - if (!data_base) { + if (!offsets) { // If multiversioning didn't run, just load the fvars directly. jl_dlsym(hdl, "jl_sysimg_fvars", (void**)&res.values, 1); return res; } + uint32_t nfunc = offsets[0]; + res.offsets = offsets + 1; + + // .data base + char *data_base; + jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 1); + // .text base char *text_base; jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 1); res.base = text_base; - int32_t *offsets; - jl_dlsym(hdl, "jl_sysimg_fvars_offsets", (void**)&offsets, 1); - uint32_t nfunc = offsets[0]; - res.offsets = offsets + 1; - void *ids; jl_dlsym(hdl, "jl_dispatch_target_ids", &ids, 1); uint32_t target_idx = callback(ids); diff --git a/src/staticdata.c b/src/staticdata.c index 822bfbb8316b1c..bbad91670d1c08 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -317,13 +317,21 @@ static void *jl_sysimg_handle = NULL; static uint64_t sysimage_base = 0; static uintptr_t *sysimg_gvars_base = NULL; static const int32_t *sysimg_gvars_offsets = NULL; +static uint64_t sysimg_gvars_max = 0; static jl_sysimg_fptrs_t sysimg_fptrs; -static inline uintptr_t *sysimg_gvars(uintptr_t *base, size_t idx) +static inline uintptr_t *sysimg_gvars(size_t idx) { if (!sysimg_gvars_offsets) - return ((uintptr_t **)base)[idx]; - return base + sysimg_gvars_offsets[idx] / sizeof(base[0]); + return ((uintptr_t **)sysimg_gvars_base)[idx]; + return sysimg_gvars_base + sysimg_gvars_offsets[idx] / sizeof(sysimg_gvars_base[0]); +} + +void jl_foreach_sysimg_gvar_slot(void (*fptr)(void *, void *, jl_value_t **), void *ctx1, void *ctx2) +{ + for (int i = 0; i < sysimg_gvars_max; ++i) { + fptr(ctx1, ctx2, (jl_value_t**)sysimg_gvars(i)); + } } JL_DLLEXPORT int jl_running_on_valgrind(void) @@ -334,11 +342,14 @@ JL_DLLEXPORT int jl_running_on_valgrind(void) static void jl_load_sysimg_so(void) { int imaging_mode = jl_generating_output() && !jl_options.incremental; + int sysimg_chained = jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + const char *fname = NULL; // in --build mode only use sysimg data, not precompiled native code - if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) { - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 0); - if (sysimg_gvars_base) { - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1); + if (sysimg_chained || + (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES)) { + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 0); + if (sysimg_gvars_offsets) { + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 1); sysimg_gvars_offsets += 1; assert(sysimg_fptrs.base); } else { @@ -349,7 +360,7 @@ static void jl_load_sysimg_so(void) jl_dlsym(jl_sysimg_handle, "jl_get_ptls_states_slot", (void **)&tls_getter_slot, 1); *tls_getter_slot = (uintptr_t)jl_get_ptls_states_getter(); size_t *tls_offset_idx; - jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1); + jl_dlsym(jl_sysimg_handle, "jl_sysimg_tls_offset", (void **)&tls_offset_idx, 1); *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); #ifdef _OS_WINDOWS_ @@ -358,6 +369,7 @@ static void jl_load_sysimg_so(void) Dl_info dlinfo; if (dladdr((void*)sysimg_gvars_base, &dlinfo) != 0) { sysimage_base = (intptr_t)dlinfo.dli_fbase; + fname = dlinfo.dli_fname; } else { sysimage_base = 0; @@ -372,6 +384,10 @@ static void jl_load_sysimg_so(void) size_t *plen; jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); jl_restore_system_image_data(sysimg_data, *plen); + + if (jl_options.use_sysimage_native_code == JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED && fname) { + jl_init_sysimage_chaining((void*)sysimage_base, fname); + } } @@ -1293,7 +1309,6 @@ static void jl_update_all_fptrs(jl_serializer_state *s) { jl_sysimg_fptrs_t fvars = sysimg_fptrs; // make these NULL now so we skip trying to restore GlobalVariable pointers later - sysimg_gvars_base = NULL; sysimg_fptrs.base = NULL; if (fvars.base == NULL && fvars.values == NULL) return; @@ -1359,10 +1374,11 @@ static void jl_update_all_gvars(jl_serializer_state *s) uint32_t offset = load_uint32(&gvars); if (offset) { uintptr_t v = get_item_for_reloc(s, base, size, offset); - *sysimg_gvars(sysimg_gvars_base, gvname_index) = v; + *sysimg_gvars(gvname_index) = v; } gvname_index += 1; } + sysimg_gvars_max = gvname_index; }