diff --git a/base/Base.jl b/base/Base.jl index aa29a6d08c943..9e66954bcc135 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -500,6 +500,14 @@ function __init__() nothing end +function __init_build() + reinit_stdio() + Sys.__init_build() + init_depot_path() + init_load_path() + init_active_project() +end + # enable threads support @eval PCRE PCRE_COMPILE_LOCK = Threads.SpinLock() diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 13872b29322a3..9810e97ca523a 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -59,6 +59,7 @@ using namespace llvm; #include "julia_internal.h" #include "jitlayers.h" #include "julia_assert.h" +#include "codegen_shared.h" template // for GlobalObject's static T *addComdat(T *G) @@ -145,10 +146,11 @@ static void emit_offset_table(Module &mod, const std::vector &vars addrs[i] = ConstantExpr::getBitCast(var, T_psize); } ArrayType *vars_type = ArrayType::get(T_psize, nvars); - new GlobalVariable(mod, vars_type, true, + GlobalVariable *GV = new GlobalVariable(mod, vars_type, true, GlobalVariable::ExternalLinkage, ConstantArray::get(vars_type, addrs), name); + GV->setSection(JL_SYSIMG_LINK_SECTION); } static bool is_safe_char(unsigned char c) @@ -240,6 +242,32 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance *ci_out = codeinst; } +StringRef lookup_sysimage_fname(void *ptr, jl_code_instance_t *codeinst) +{ + if (ptr == (void*)&jl_fptr_args_addr) { + return "jl_fptr_args"; + } else if (ptr == (void*)&jl_fptr_sparam_addr) { + return "jl_fptr_sparam"; + } else if (ptr == (void*)&jl_fptr_const_return_addr) { + return "jl_fptr_const_return"; + } + return jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)ptr, codeinst, false); +} + +static void add_gv(void *ctx, void *mod, jl_value_t **gv_slot) +{ + jl_codegen_params_t *params = (jl_codegen_params_t*)ctx; + Module *M = (Module *)mod; + GlobalVariable* &lgv = params->globals[*gv_slot]; + if (!lgv){ + lgv = new GlobalVariable(*M, + JuliaType::get_pjlvalue_ty(M->getContext()), + false, GlobalVariable::PrivateLinkage, + NULL, jl_ExecutionEngine->getGlobalAtAddress((uintptr_t)gv_slot)); + lgv->setExternallyInitialized(true); + } +} + // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing @@ -275,6 +303,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm params.imaging = imaging; + bool sysimg_chained = jl_options.use_sysimage_native_code == JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + if (sysimg_chained) { + jl_foreach_sysimg_gvar_slot(add_gv, (void*)¶ms, (void*)clone.getModuleUnlocked()); + } + // compile all methods for the current world and type-inference world size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) }; for (int worlds = 0; worlds < 2; worlds++) { @@ -303,10 +336,33 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm // find and prepare the source code to compile jl_code_instance_t *codeinst = NULL; jl_ci_cache_lookup(*cgparams, mi, params.world, &codeinst, &src); - if (src && !emitted.count(codeinst)) { + // determines if the instance should be compiled + bool compile = true; + uint8_t precompile = jl_atomic_load_relaxed(&codeinst->precompile); + if (codeinst && (precompile & 2)) { + // This condition (precompile & 2) provides a speed-up + // Skip things already in the sysimage, we'll pick it up from there. + jl_llvm_functions_t fnames = { + lookup_sysimage_fname((void*)(codeinst->invoke.load()), codeinst).str(), + lookup_sysimage_fname(codeinst->specptr.fptr, codeinst).str(), + }; + if (!fnames.functionObject.empty() && !fnames.specFunctionObject.empty()){ + orc::ThreadSafeModule no_module; + if (emitted.find(codeinst) == emitted.end()){ + emitted[codeinst] = {std::move(no_module), std::move(fnames)}; + } + compile = false; + } + } + if (sysimg_chained && !(precompile & 4)){ + // not tagged for being compiled to sysimage + compile = false; + } + if (compile && src && !emitted.count(codeinst)) { // now add it to our compilation results JL_GC_PROMISE_ROOTED(codeinst->rettype); - orc::ThreadSafeModule result_m = jl_create_llvm_module(name_from_method_instance(codeinst->def), + orc::ThreadSafeModule result_m = jl_create_llvm_module( + name_from_method_instance(codeinst->def), params.tsctx, params.imaging, clone.getModuleUnlocked()->getDataLayout(), Triple(clone.getModuleUnlocked()->getTargetTriple())); @@ -333,11 +389,30 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm // clones the contents of the module `m` to the shadow_output collector // while examining and recording what kind of function pointer we have for (auto &def : emitted) { - jl_merge_module(clone, std::move(std::get<0>(def.second))); jl_code_instance_t *this_code = def.first; jl_llvm_functions_t decls = std::get<1>(def.second); StringRef func = decls.functionObject; StringRef cfunc = decls.specFunctionObject; + if (std::get<0>(def.second)) + jl_merge_module(clone, std::move(std::get<0>(def.second))); + else { + // TODO: Probably wait until all other modules were merged + // TODO: These signatures aren't actually right, but it's not worth + // trying to compute signatures for these. Maybe declare them as + // void* global variables instead and have jl_merge_module know + // how to merge them if it comes to it? + auto &context = clone.getModuleUnlocked()->getContext(); + FunctionType *jl_func_sig = JuliaType::get_jlfunc_ty(context); + + Function::Create(jl_func_sig, + GlobalVariable::ExternalLinkage, + func, clone.getModuleUnlocked()); + if (!cfunc.empty()) { + Function::Create(jl_func_sig, + GlobalVariable::ExternalLinkage, + cfunc, clone.getModuleUnlocked()); + } + } uint32_t func_id = 0; uint32_t cfunc_id = 0; if (func == "jl_fptr_args") { @@ -346,6 +421,9 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm else if (func == "jl_fptr_sparam") { func_id = -2; } + else if (func == "jl_fptr_const_return") { + func_id = -3; + } else { //Safe b/c context is locked by params data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(func))); @@ -365,10 +443,12 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm // now get references to the globals in the merged module // and set them to be internalized and initialized at startup for (auto &global : gvars) { - //Safe b/c context is locked by params - GlobalVariable *G = cast(clone.getModuleUnlocked()->getNamedValue(global)); - G->setInitializer(ConstantPointerNull::get(cast(G->getValueType()))); - G->setLinkage(GlobalVariable::InternalLinkage); + auto gv = clone.getModuleUnlocked()->getNamedValue(global); + GlobalVariable *G = cast(gv); + if (!G->isExternallyInitialized()) + G->setInitializer(ConstantPointerNull::get(cast(G->getValueType()))); + G->setLinkage(GlobalVariable::ExternalLinkage); + G->setVisibility(GlobalVariable::HiddenVisibility); data->jl_sysimg_gvars.push_back(G); } @@ -389,7 +469,10 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm //Safe b/c context is locked by params for (GlobalObject &G : clone.getModuleUnlocked()->global_objects()) { if (!G.isDeclaration()) { - G.setLinkage(Function::InternalLinkage); + if (G.getLinkage() != GlobalVariable::PrivateLinkage) { + G.setLinkage(Function::ExternalLinkage); + G.setVisibility(GlobalVariable::HiddenVisibility); + } makeSafeName(G); addComdat(&G); #if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_) @@ -496,6 +579,8 @@ void jl_dump_native_impl(void *native_code, std::vector unopt_bc_Archive; std::vector outputs; + bool sysimg_chained = jl_options.use_sysimage_native_code == JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + legacy::PassManager preopt, postopt; if (unopt_bc_fname) @@ -515,7 +600,7 @@ void jl_dump_native_impl(void *native_code, legacy::PassManager optimizer; if (bc_fname || obj_fname || asm_fname) { addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis()); - addOptimizationPasses(&optimizer, jl_options.opt_level, true, true); + addOptimizationPasses(&optimizer, jl_options.opt_level, true, true, false, sysimg_chained); addMachinePasses(&optimizer, jl_options.opt_level); } @@ -538,12 +623,15 @@ void jl_dump_native_impl(void *native_code, // reflect the address of the jl_RTLD_DEFAULT_handle variable // back to the caller, so that we can check for consistency issues GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(dataM); - addComdat(new GlobalVariable(*dataM, + GlobalVariable *jlRTLD_DEFAULT_var_pointer = + new GlobalVariable(*dataM, jlRTLD_DEFAULT_var->getType(), true, GlobalVariable::ExternalLinkage, jlRTLD_DEFAULT_var, - "jl_RTLD_DEFAULT_handle_pointer")); + "jl_RTLD_DEFAULT_handle_pointer"); + jlRTLD_DEFAULT_var_pointer->setSection(JL_SYSIMG_LINK_SECTION); + addComdat(jlRTLD_DEFAULT_var_pointer); } // do the actual work @@ -622,7 +710,7 @@ void addMachinePasses(legacy::PassManagerBase *PM, int optlevel) // it assumes that the TLI and TTI wrapper passes have already been added. void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics, bool dump_native, - bool external_use) + bool external_use, bool chained) { // Note: LLVM 12 disabled the hoisting of common instruction // before loop vectorization (https://reviews.llvm.org/D84108). @@ -682,7 +770,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, PM->add(createRemoveNIPass()); } PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop - if (dump_native) { + if (dump_native && !chained) { PM->add(createMultiVersioningPass(external_use)); PM->add(createCPUFeaturesPass()); // minimal clean-up to get rid of CPU feature checks @@ -724,7 +812,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, // consider AggressiveInstCombinePass at optlevel > 2 PM->add(createInstructionCombiningPass()); PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions)); - if (dump_native) + if (dump_native && !chained) PM->add(createMultiVersioningPass(external_use)); PM->add(createCPUFeaturesPass()); PM->add(createSROAPass()); diff --git a/src/ccall.cpp b/src/ccall.cpp index 88c80b333b027..3f6204ac69f6a 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -48,7 +48,9 @@ STATISTIC(SRetCCalls, "Number of ccalls that were marked sret"); // somewhat unusual variable, in that aotcompile wants to get the address of this for a sanity check GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M) { - return prepare_global_in(M, jlRTLD_DEFAULT_var); + GlobalVariable *var = prepare_global_in(M, jlRTLD_DEFAULT_var); + var->setSection(JL_SYSIMG_LINK_SECTION); + return var; } diff --git a/src/cgutils.cpp b/src/cgutils.cpp index db3807de988b2..f9fb52a692f78 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -119,6 +119,7 @@ static Value *stringConstPtr( StringRef ctxt(txt.c_str(), txt.size() + 1); Constant *Data = ConstantDataArray::get(irbuilder.getContext(), arrayRefFromStringRef(ctxt)); GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M); + gv->setLinkage(GlobalVariable::PrivateLinkage); Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0); Value *Args[] = { zero, zero }; return irbuilder.CreateInBoundsGEP(gv->getValueType(), gv, Args); @@ -301,16 +302,26 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr) if (!gv) { raw_string_ostream(gvname) << cname << ctx.global_targets.size(); localname = StringRef(gvname); + gv = new GlobalVariable(*M, ctx.types().T_pjlvalue, + false, GlobalVariable::InternalLinkage, + NULL, localname); } else { localname = gv->getName(); - if (gv->getParent() != M) + if (gv->getParent() != M) { + GlobalVariable *oldgv = gv; gv = cast_or_null(M->getNamedValue(localname)); + if (!gv) { + gv = new GlobalVariable(*M, ctx.types().T_pjlvalue, + false, oldgv->getLinkage(), + NULL, localname); + if (oldgv->isExternallyInitialized()) { + gv->setExternallyInitialized(true); + } + } + } } - if (gv == nullptr) - gv = new GlobalVariable(*M, ctx.types().T_pjlvalue, - false, GlobalVariable::PrivateLinkage, - NULL, localname); + assert(gv != nullptr); // LLVM passes sometimes strip metadata when moving load around // since the load at the new location satisfy the same condition as the original one. // Mark the global as constant to LLVM code using our own metadata @@ -390,7 +401,7 @@ static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p) return julia_pgv(ctx, "jl_sym#", addr, NULL, p); } // something else gets just a generic name - return julia_pgv(ctx, "jl_global#", p); + return julia_pgv(ctx, "jl_global#abc#", p); } static size_t dereferenceable_size(jl_value_t *jt) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 1f209f36291a2..c87328ec29849 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -68,6 +68,10 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void) JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmctxt, const jl_cgparams_t *cgparams, int _policy) UNAVAILABLE +JL_DLLEXPORT void jl_init_sysimage_chaining_fallback(void *sysimg_base, const char *fname) +{ +} + JL_DLLEXPORT void jl_dump_compiles_fallback(void *s) { } diff --git a/src/codegen.cpp b/src/codegen.cpp index 8ac0cf6105601..f9744a600772b 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1535,11 +1535,16 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G) static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, StringRef name, Module &M) { + bool sysimg_chained = jl_options.use_sysimage_native_code == JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; GlobalVariable *&gv = emission_context.mergedConstants[val]; StringRef localname; std::string ssno; if (gv == nullptr) { raw_string_ostream(ssno) << name << emission_context.mergedConstants.size(); + if (sysimg_chained){ + // modify the name for sysimage to prevent collisions + raw_string_ostream(ssno) << "chained"; + } localname = StringRef(ssno); } else { @@ -1663,7 +1668,9 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_ { Value *loc; if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr - loc = get_pointer_to_constant(ctx.emission_context, cast(v), "_j_const", *jl_Module); + GlobalVariable *gv = get_pointer_to_constant(ctx.emission_context, cast(v), "_j_const", *jl_Module); + gv->setLinkage(GlobalVariable::PrivateLinkage); + loc = gv; } else { loc = emit_static_alloca(ctx, v->getType()); diff --git a/src/gf.c b/src/gf.c index 1d36589a082f5..d24e95f583718 100644 --- a/src/gf.c +++ b/src/gf.c @@ -214,6 +214,13 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ return sf->func.value; } +uint8_t precompiles_for_sysimage = 0; + +JL_DLLEXPORT void jl_precompiles_for_sysimage(uint8_t enable) +{ + precompiles_for_sysimage = enable; +} + // ----- MethodInstance specialization instantiation ----- // JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst( @@ -2136,7 +2143,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t else { record_precompile_statement(mi); } - jl_atomic_store_relaxed(&codeinst->precompile, 1); + uint8_t precompile = precompiles_for_sysimage ? 5 : 1; + jl_atomic_store_relaxed(&codeinst->precompile, precompile); return codeinst; } @@ -2264,7 +2272,8 @@ static void _generate_from_hint(jl_method_instance_t *mi, size_t world) if (codeinst != jl_nothing) { if (jl_atomic_load_relaxed(&((jl_code_instance_t*)codeinst)->invoke) == jl_fptr_const_return) return; // probably not a good idea to generate code - jl_atomic_store_relaxed(&((jl_code_instance_t*)codeinst)->precompile, 1); + uint8_t precompile = precompiles_for_sysimage ? 5 : 1; + jl_atomic_store_relaxed(&((jl_code_instance_t*)codeinst)->precompile, precompile); } } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 50316c258d3a0..73c471e0f8c99 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -54,6 +54,18 @@ using namespace llvm; #define DEBUG_TYPE "jitlayers" +extern "C" JL_DLLEXPORT +void jl_init_sysimage_chaining_impl(void *sysimg_base, const char *fname) +{ + auto errorobj = llvm::object::ObjectFile::createObjectFile(fname); + if (!errorobj) { + jl_error("Failed to load sysimg symbol table"); + } + + auto *theobj = errorobj->getBinary(); + jl_ExecutionEngine->addSysimgSymbolsByName(sysimg_base, theobj); +} + // Snooping on which functions are being compiled, and how long it takes extern "C" JL_DLLEXPORT void jl_dump_compiles_impl(void *s) @@ -139,6 +151,9 @@ static jl_callptr_t _jl_compile_codeinst( StringMap NewGlobals; for (auto &global : params.globals) { NewGlobals[global.second->getName()] = global.first; + if (global.second->getName().empty()){ + jl_error("JIT Here is a problem - empty name ! (add_gv)"); + } } for (auto &def : emitted) { orc::ThreadSafeModule &TSM = std::get<0>(def.second); @@ -1140,7 +1155,7 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) SectionMemoryManager::getSymbolAddressInProcess( getMangledName(F->getName())))) { llvm::errs() << "FATAL ERROR: " - << "Symbol \"" << F->getName().str() << "\"" + << "Symbol \"" << F->getName().str() << "\" " << "not found"; abort(); } @@ -1192,11 +1207,18 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) return cantFail(addr.getAddress()); } -StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst) +StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst, bool create) { std::lock_guard lock(RLST_mutex); - std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr]; - if (fname->empty()) { + void *addres = (void*)(uintptr_t)Addr; + if (!create){ // do not create the local symbol + if(ReverseLocalSymbolTable.find(addres) != ReverseLocalSymbolTable.end()){ + return ReverseLocalSymbolTable[addres]; + } + return StringRef(""); + } + std::string *fname = &ReverseLocalSymbolTable[addres]; + if (fname->empty()) { // create the local symbol std::string string_fname; raw_string_ostream stream_fname(string_fname); // try to pick an appropriate name that describes it @@ -1221,6 +1243,43 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod return *fname; } +StringRef JuliaOJIT::getGlobalAtAddress(uint64_t Addr) +{ + auto &fname = ReverseLocalSymbolTable[(void*)(uintptr_t)Addr]; + assert(!fname.empty()); + if(fname.empty()){ + for(auto & gv : ReverseLocalSymbolTable){ + jl_safe_printf("%p", gv.first); + jl_safe_printf(" %s\n", gv.second.c_str()); + } + jl_safe_printf("Name is empty %d\n", fname.empty()); + jl_errorf("Name %s", fname.c_str()); + } + return fname; +} + +void JuliaOJIT::addSysimgSymbolsByName(void *sysimg_base, llvm::object::ObjectFile *ofile) +{ + for (auto symbol : ofile->symbols()) { + if (symbol.getType().get() != llvm::object::SymbolRef::ST_Function && + symbol.getType().get() != llvm::object::SymbolRef::ST_Data) { + continue; + } + if (symbol.getFlags().get() & llvm::object::SymbolRef::SF_Undefined) { + continue; + } + void *Addr = (void*)((char*)sysimg_base + symbol.getAddress().get()); + std::string &fname = ReverseLocalSymbolTable[Addr]; + if (fname.empty()) { + StringRef symname = symbol.getName().get(); + jl_sym_t *symsym = jl_symbol_n(symname.data(), symname.size()); + fname = jl_symbol_name(symsym); + assert(!fname.empty()); + addGlobalMapping(fname, (uintptr_t)Addr); + } + } +} + #ifdef JL_USE_JITLINK # if JL_LLVM_VERSION < 140000 diff --git a/src/jitlayers.h b/src/jitlayers.h index ee3d0c14b3751..d06c38e52f023 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "julia_assert.h" @@ -58,7 +59,8 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeCont DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef) void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis); -void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false); +void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, + bool dump_native=false, bool external_use=false, bool chained=false); void addMachinePasses(legacy::PassManagerBase *PM, int optlevel); void jl_finalize_module(orc::ThreadSafeModule m); void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src); @@ -376,7 +378,11 @@ class JuliaOJIT { JL_JITSymbol findUnmangledSymbol(StringRef Name); uint64_t getGlobalValueAddress(StringRef Name); uint64_t getFunctionAddress(StringRef Name); - StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst); + StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst, bool create = true); + + StringRef getGlobalAtAddress(uint64_t Addr); + void addSysimgSymbolsByName(void *sysimg_base, llvm::object::ObjectFile *ofile); + auto getContext() { return *ContextPool; } diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index f0cc94d22ba68..71b2491941b63 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -371,6 +371,7 @@ XX(jl_pointerref) \ XX(jl_pointerset) \ XX(jl_pop_handler) \ + XX(jl_precompiles_for_sysimage) \ XX(jl_preload_sysimg_so) \ XX(jl_prepend_cwd) \ XX(jl_printf) \ @@ -552,6 +553,7 @@ YY(jl_type_to_llvm) \ YY(jl_getUnwindInfo) \ YY(jl_get_libllvm) \ + YY(jl_init_sysimage_chaining) \ YY(jl_add_optimization_passes) \ YY(LLVMExtraAddLowerSimdLoopPass) \ YY(LLVMExtraAddFinalLowerGCPass) \ diff --git a/src/jloptions.c b/src/jloptions.c index ef5d192322c64..0bce9ff688c04 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -427,7 +427,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) jl_errorf("julia: invalid argument to --banner={yes|no|auto} (%s)", optarg); break; case opt_sysimage_native_code: - if (!strcmp(optarg,"yes")) + if (!strcmp(optarg,"chained")) + jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + else if (!strcmp(optarg,"yes")) jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES; else if (!strcmp(optarg,"no")) jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO; diff --git a/src/julia.h b/src/julia.h index f8c39c7ab448b..8cd2e5a2bbfaf 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2144,6 +2144,7 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_OPTIONS_HANDLE_SIGNALS_ON 1 #define JL_OPTIONS_HANDLE_SIGNALS_OFF 0 +#define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED 2 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES 1 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO 0 diff --git a/src/julia_internal.h b/src/julia_internal.h index 60583f2240aea..a726918e833c5 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -775,6 +775,9 @@ void jl_init_thread_heap(jl_ptls_t ptls); void jl_init_int32_int64_cache(void); JL_DLLEXPORT void jl_init_options(void); +void jl_init_sysimage_chaining(void *sysimg_base, const char *fname); +void JL_DLLEXPORT jl_foreach_sysimg_gvar_slot(void (*fptr)(void *, void *, jl_value_t **), void *ctx1, void *ctx2); + void jl_teardown_codegen(void); void jl_set_base_ctx(char *__stk); @@ -895,6 +898,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo( jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins); JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_method_instance_t *caller); JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller); +JL_DLLEXPORT void jl_precompiles_for_sysimage(uint8_t enamble); uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT; jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs); @@ -1546,6 +1550,12 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT; float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT; uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT; +#ifdef _OS_DARWIN_ +#define JL_SYSIMG_LINK_SECTION "__DATA,__jl_sysimg_link" +#else +#define JL_SYSIMG_LINK_SECTION ".data.jl.sysimg_link" +#endif + #ifdef __cplusplus } #endif diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 4badf555bcdbe..da1f641500fac 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -937,10 +937,12 @@ Constant *CloneCtx::emit_offset_table(const std::vector &vars, StringRef nam for (uint32_t i = 1; i < nvars; i++) offsets[i + 1] = get_ptrdiff32(vars[i], vbase); ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1); - add_comdat(new GlobalVariable(M, vars_type, true, + GlobalVariable *GV = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage, ConstantArray::get(vars_type, offsets), - name + "_offsets")); + name + "_offsets"); + GV->setSection(JL_SYSIMG_LINK_SECTION); + add_comdat(GV); return vbase; } diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index e948e1c1a10bc..92612010d5c2b 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -135,14 +135,23 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const { - // Create a static global variable and points a global alias to it so that - // the address is visible externally but LLVM can still assume that the - // address of this variable doesn't need dynamic relocation - // (can be accessed with a single PC-rel load). - auto GV = new GlobalVariable(*M, T, false, GlobalVariable::InternalLinkage, +#ifndef _OS_DARWIN_ + // ELF linkers are picky about DSO-local references. Trick them by adding + // an extra global with the same address, but different linkage. This + // allows LLVM to use a PIC-rel reference, while still making the symbol + // available for dlsym. + auto GV = new GlobalVariable(*M, T, false, GlobalVariable::WeakODRLinkage, Constant::getNullValue(T), name + ".real"); - add_comdat(GlobalAlias::create(T, 0, GlobalVariable::ExternalLinkage, + GV->setVisibility(GlobalVariable::HiddenVisibility); + GV->setDSOLocal(true); + add_comdat(GlobalAlias::create(T, 0, GlobalVariable::WeakODRLinkage, name, GV, M)); +#else + auto GV = new GlobalVariable(*M, T, false, GlobalVariable::CommonLinkage, + Constant::getNullValue(T), name); + GV->setVisibility(GlobalVariable::DefaultVisibility); + GV->setDSOLocal(true); +#endif return GV; } @@ -267,7 +276,7 @@ bool LowerPTLS::runOnModule(Module &_M, bool *CFGModified) if (imaging_mode) { pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot"); pgcstack_key_slot = create_aliased_global(getSizeTy(_M.getContext()), "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t) - pgcstack_offset = create_aliased_global(getSizeTy(_M.getContext()), "jl_tls_offset"); + pgcstack_offset = create_aliased_global(getSizeTy(_M.getContext()), "jl_sysimg_tls_offset"); } for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) { diff --git a/src/precompile.c b/src/precompile.c index 7713a312f2a4c..9a31978b667b5 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -341,7 +341,7 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) { do_compile = 1; } - else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { + else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || (jl_atomic_load_relaxed(&codeinst->precompile) & 1)) { do_compile = 1; } } diff --git a/src/processor.cpp b/src/processor.cpp index b9dfc2b7f0b4e..e6e60a100651f 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -623,21 +623,29 @@ static inline std::vector> &get_cmdline_targets(F &&feature_cb) template static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) { - jl_sysimg_fptrs_t res = {nullptr, 0, nullptr, 0, nullptr, nullptr}; + jl_sysimg_fptrs_t res = {nullptr, 0, nullptr, nullptr, 0, nullptr, nullptr}; + + int32_t *offsets; + jl_dlsym(hdl, "jl_sysimg_fvars_offsets", (void**)&offsets, 0); + + if (!offsets) { + // If multiversioning didn't run, just load the fvars directly. + jl_dlsym(hdl, "jl_sysimg_fvars", (void**)&res.values, 1); + return res; + } + + uint32_t nfunc = offsets[0]; + res.offsets = offsets + 1; // .data base char *data_base; jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 1); + // .text base char *text_base; jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 1); res.base = text_base; - int32_t *offsets; - jl_dlsym(hdl, "jl_sysimg_fvars_offsets", (void**)&offsets, 1); - uint32_t nfunc = offsets[0]; - res.offsets = offsets + 1; - void *ids; jl_dlsym(hdl, "jl_dispatch_target_ids", &ids, 1); uint32_t target_idx = callback(ids); diff --git a/src/processor.h b/src/processor.h index f3b571cf9b937..2f5a5f1c154fc 100644 --- a/src/processor.h +++ b/src/processor.h @@ -141,6 +141,9 @@ typedef struct _jl_sysimg_fptrs_t { // function pointer offsets const int32_t *offsets; + // If saved as values rather than offsets, look here + const uintptr_t *values; + // Following fields contains the information about the selected target. // All of these fields are 0 if the selected targets have all the functions cloned. // Instead the offsets are stored in `noffsets` and `offsets`. diff --git a/src/staticdata.c b/src/staticdata.c index 8a3d4132c42f5..9678b604a6d0f 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -371,11 +371,21 @@ static void *jl_sysimg_handle = NULL; static uint64_t sysimage_base = 0; static uintptr_t *sysimg_gvars_base = NULL; static const int32_t *sysimg_gvars_offsets = NULL; +static uint64_t sysimg_gvars_max = 0; static jl_sysimg_fptrs_t sysimg_fptrs; -static inline uintptr_t *sysimg_gvars(uintptr_t *base, size_t idx) +static inline uintptr_t *sysimg_gvars(size_t idx) { - return base + sysimg_gvars_offsets[idx] / sizeof(base[0]); + if (!sysimg_gvars_offsets) + return ((uintptr_t **)sysimg_gvars_base)[idx]; + return sysimg_gvars_base + sysimg_gvars_offsets[idx] / sizeof(sysimg_gvars_base[0]); +} + +void jl_foreach_sysimg_gvar_slot(void (*fptr)(void *, void *, jl_value_t **), void *ctx1, void *ctx2) +{ + for (int i = 0; i < sysimg_gvars_max; ++i) { + fptr(ctx1, ctx2, (jl_value_t**)sysimg_gvars(i)); + } } JL_DLLEXPORT int jl_running_on_valgrind(void) @@ -386,12 +396,20 @@ JL_DLLEXPORT int jl_running_on_valgrind(void) static void jl_load_sysimg_so(void) { int imaging_mode = jl_generating_output() && !jl_options.incremental; + int sysimg_chained = jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED; + const char *fname = NULL; // in --build mode only use sysimg data, not precompiled native code - if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) { - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 1); - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1); - sysimg_gvars_offsets += 1; - assert(sysimg_fptrs.base); + if (sysimg_chained || + (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES)) { + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 0); + if (sysimg_gvars_offsets) { + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 1); + sysimg_gvars_offsets += 1; + assert(sysimg_fptrs.base); + } else { + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars", (void **)&sysimg_gvars_base, 1); + assert(sysimg_fptrs.values); + } void *pgcstack_func_slot; jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1); @@ -400,7 +418,7 @@ static void jl_load_sysimg_so(void) jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot); size_t *tls_offset_idx; - jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1); + jl_dlsym(jl_sysimg_handle, "jl_sysimg_tls_offset", (void **)&tls_offset_idx, 1); *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); #ifdef _OS_WINDOWS_ @@ -409,6 +427,7 @@ static void jl_load_sysimg_so(void) Dl_info dlinfo; if (dladdr((void*)sysimg_gvars_base, &dlinfo) != 0) { sysimage_base = (intptr_t)dlinfo.dli_fbase; + fname = dlinfo.dli_fname; } else { sysimage_base = 0; @@ -423,6 +442,10 @@ static void jl_load_sysimg_so(void) size_t *plen; jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); jl_restore_system_image_data(sysimg_data, *plen); + + if (jl_options.use_sysimage_native_code == JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_CHAINED && fname) { + jl_init_sysimage_chaining((void*)sysimage_base, fname); + } } @@ -1098,6 +1121,9 @@ static void jl_write_values(jl_serializer_state *s) else if (invokeptr_id == -2) { fptr_id = JL_API_WITH_PARAMETERS; } + else if (invokeptr_id == -3) { + fptr_id = JL_API_CONST; + } else { assert(invokeptr_id > 0); ios_ensureroom(s->fptr_record, invokeptr_id * sizeof(void*)); @@ -1348,11 +1374,11 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas case FunctionRef: switch ((jl_callingconv_t)offset) { case JL_API_BOXED: - if (sysimg_fptrs.base) + if (sysimg_fptrs.base || sysimg_fptrs.values) return (uintptr_t)jl_fptr_args; JL_FALLTHROUGH; case JL_API_WITH_PARAMETERS: - if (sysimg_fptrs.base) + if (sysimg_fptrs.base || sysimg_fptrs.values) return (uintptr_t)jl_fptr_sparam; return (uintptr_t)NULL; case JL_API_CONST: @@ -1462,9 +1488,8 @@ static void jl_update_all_fptrs(jl_serializer_state *s) { jl_sysimg_fptrs_t fvars = sysimg_fptrs; // make these NULL now so we skip trying to restore GlobalVariable pointers later - sysimg_gvars_base = NULL; sysimg_fptrs.base = NULL; - if (fvars.base == NULL) + if (fvars.base == NULL && fvars.values == NULL) return; int sysimg_fvars_max = s->fptr_record->size / sizeof(void*); size_t i; @@ -1485,20 +1510,26 @@ static void jl_update_all_fptrs(jl_serializer_state *s) offset = ~offset; } jl_code_instance_t *codeinst = (jl_code_instance_t*)(base + offset); + codeinst->precompile |= 2; uintptr_t base = (uintptr_t)fvars.base; assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return); assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL); linfos[i] = codeinst->def; // now it's a MethodInstance - int32_t offset = fvars.offsets[i]; - for (; clone_idx < fvars.nclones; clone_idx++) { - uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask; - if (idx < i) - continue; - if (idx == i) - offset = fvars.clone_offsets[clone_idx]; - break; + void *fptr = NULL; + if (fvars.offsets) { + int32_t offset = fvars.offsets[i]; + for (; clone_idx < fvars.nclones; clone_idx++) { + uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask; + if (idx < i) + continue; + if (idx == i) + offset = fvars.clone_offsets[clone_idx]; + break; + } + fptr = (void*)(base + offset); + } else { + fptr = (void*)fvars.values[i]; } - void *fptr = (void*)(base + offset); if (specfunc) { codeinst->specptr.fptr = fptr; codeinst->isspecsig = 1; // TODO: set only if confirmed to be true @@ -1527,10 +1558,11 @@ static void jl_update_all_gvars(jl_serializer_state *s) uint32_t offset = load_uint32(&gvars); if (offset) { uintptr_t v = get_item_for_reloc(s, base, size, offset); - *sysimg_gvars(sysimg_gvars_base, gvname_index) = v; + *sysimg_gvars(gvname_index) = v; } gvname_index += 1; } + sysimg_gvars_max = gvname_index; } diff --git a/test/chainedsysimage.jl b/test/chainedsysimage.jl new file mode 100644 index 0000000000000..37588fed30e62 --- /dev/null +++ b/test/chainedsysimage.jl @@ -0,0 +1,51 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Test, Libdl + +@testset "empty chained sysimage" begin + binariesavailable = false + try + # TODO - consider using LLVM_full_jll to make this test multiplatform + success(`ar --version`) + success(`llvm-objcopy --version`) + success(`ld --version`) + binariesavailable = true + catch + end + + if !binariesavailable + @test_broken false # Not supported for this OS yet + else + sysimg = Base.unsafe_string(Base.JLOptions().image_file) + cd(mktempdir()) do + sysoa = replace(sysimg, ".$(Libdl.dlext)" => "-o.a") + cp("$sysoa", "sys-o.a", force=true) + @test success(`ar x sys-o.a`) + rm("data.o") + mv("text.o", "text-old.o") + @test success(`llvm-objcopy --remove-section .data.jl.sysimg_link text-old.o`) + + source_txt = """ +Base.__init_build(); +module PrecompileStagingArea; + using Printf +end; +@ccall jl_precompiles_for_sysimage(1::Cuchar)::Cvoid; +println(0.1, 1, 0x2) +""" + + @test success(`$(Base.julia_cmd()) --sysimage-native-code=chained --startup-file=no --sysimage=$sysimg --output-o chained.o.a -e $source_txt`) + @test success(`ar x chained.o.a`) # Extract new sysimage files + @test success(`ld -shared -o chained.$(Libdl.dlext) text.o data.o text-old.o`) + + # Test if "println(0.1, 1, 0x2)" is precompiled + source_txt2 = """ +a = @allocated println(0.1, 1, 0x2); +b = @allocated println(0.1, 1, 0x2); +@assert a == b; +""" + + @test success(`$(Base.julia_cmd()) --sysimage=chained.$(Libdl.dlext) -e $source_txt2`) + end + end +end \ No newline at end of file