Skip to content

Commit

Permalink
src: refactor embedded entrypoint loading
Browse files Browse the repository at this point in the history
This patch:

1. Refactor the routines used to compile and run an embedder
  entrypoint. In JS land special handling for SEA is done
  directly in main/embedding.js instead of clobbering the CJS
  loader. Add warnings to remind users that currently the
  require() in SEA bundled scripts only supports loading builtins.
2. Don't use the bundled SEA code cache when compiling CJS
  loaded from disk, since in that case we are certainly not
  compiling the code bundled into the SEA. Use a is_sea_main
  flag in CompileFunctionForCJSLoader() (which replaces an unused
  argument) to pass this into the C++ land - the code cache is
  still read directly from C++ to avoid the overhead of
  ArrayBuffer creation.
3. Move SEA loading code into
  MaybeLoadSingleExecutableApplication() which calls
  LoadEnvironment() with its own StartExecutionCallback().
  This avoids more hidden switches in StartExecution() and
  make them explicit. Also add some TODOs about how to support
  ESM in embedded applications.
4. Add more comments

PR-URL: #53573
Reviewed-By: Geoffrey Booth <webadmin@geoffreybooth.com>
Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
joyeecheung authored Jul 5, 2024
1 parent b32c722 commit 10099bb
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 134 deletions.
2 changes: 1 addition & 1 deletion lib/internal/main/check_syntax.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@ async function checkSyntax(source, filename) {
return;
}

wrapSafe(filename, source);
wrapSafe(filename, source, undefined, 'commonjs');
}
105 changes: 103 additions & 2 deletions lib/internal/main/embedding.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,116 @@
'use strict';

// This main script is currently only run when LoadEnvironment()
// is run with a non-null StartExecutionCallback or a UTF8
// main script. Effectively there are two cases where this happens:
// 1. It's a single-executable application *loading* a main script
// bundled into the executable. This is currently done from
// NodeMainInstance::Run().
// 2. It's an embedder application and LoadEnvironment() is invoked
// as described above.

const {
prepareMainThreadExecution,
} = require('internal/process/pre_execution');
const { isExperimentalSeaWarningNeeded } = internalBinding('sea');
const { isExperimentalSeaWarningNeeded, isSea } = internalBinding('sea');
const { emitExperimentalWarning } = require('internal/util');
const { embedderRequire, embedderRunCjs } = require('internal/util/embedding');
const { emitWarningSync } = require('internal/process/warning');
const { BuiltinModule: { normalizeRequirableId } } = require('internal/bootstrap/realm');
const { Module } = require('internal/modules/cjs/loader');
const { compileFunctionForCJSLoader } = internalBinding('contextify');
const { maybeCacheSourceMap } = require('internal/source_map/source_map_cache');

const { codes: {
ERR_UNKNOWN_BUILTIN_MODULE,
} } = require('internal/errors');

// Don't expand process.argv[1] because in a single-executable application or an
// embedder application, the user main script isn't necessarily provided via the
// command line (e.g. it could be provided via an API or bundled into the executable).
prepareMainThreadExecution(false, true);

const isLoadingSea = isSea();
if (isExperimentalSeaWarningNeeded()) {
emitExperimentalWarning('Single executable application');
}

// This is roughly the same as:
//
// const mod = new Module(filename);
// mod._compile(content, filename);
//
// but the code has been duplicated because currently there is no way to set the
// value of require.main to module.
//
// TODO(RaisinTen): Find a way to deduplicate this.
function embedderRunCjs(content) {
// The filename of the module (used for CJS module lookup)
// is always the same as the location of the executable itself
// at the time of the loading (which means it changes depending
// on where the executable is in the file system).
const filename = process.execPath;
const customModule = new Module(filename, null);

const {
function: compiledWrapper,
cachedDataRejected,
sourceMapURL,
} = compileFunctionForCJSLoader(
content,
filename,
isLoadingSea, // is_sea_main
false, // should_detect_module, ESM should be supported differently for embedded code
);
// Cache the source map for the module if present.
if (sourceMapURL) {
maybeCacheSourceMap(
filename,
content,
customModule,
false, // isGeneratedSource
undefined, // sourceURL, TODO(joyeecheung): should be extracted by V8
sourceMapURL,
);
}

// cachedDataRejected is only set if cache from SEA is used.
if (cachedDataRejected !== false && isLoadingSea) {
emitWarningSync('Code cache data rejected.');
}

// Patch the module to make it look almost like a regular CJS module
// instance.
customModule.filename = process.execPath;
customModule.paths = Module._nodeModulePaths(process.execPath);
embedderRequire.main = customModule;

return compiledWrapper(
customModule.exports, // exports
embedderRequire, // require
customModule, // module
process.execPath, // __filename
customModule.path, // __dirname
);
}

let warnedAboutBuiltins = false;

function embedderRequire(id) {
const normalizedId = normalizeRequirableId(id);
if (!normalizedId) {
if (isLoadingSea && !warnedAboutBuiltins) {
emitWarningSync(
'Currently the require() provided to the main script embedded into ' +
'single-executable applications only supports loading built-in modules.\n' +
'To load a module from disk after the single executable application is ' +
'launched, use require("module").createRequire().\n' +
'Support for bundled module loading or virtual file systems are under ' +
'discussions in https://github.com/nodejs/single-executable');
warnedAboutBuiltins = true;
}
throw new ERR_UNKNOWN_BUILTIN_MODULE(id);
}
return require(normalizedId);
}

return [process, embedderRequire, embedderRunCjs];
17 changes: 4 additions & 13 deletions lib/internal/modules/cjs/loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -1343,11 +1343,10 @@ function loadESMFromCJS(mod, filename) {
* Wraps the given content in a script and runs it in a new context.
* @param {string} filename The name of the file being loaded
* @param {string} content The content of the file being loaded
* @param {Module} cjsModuleInstance The CommonJS loader instance
* @param {object} codeCache The SEA code cache
* @param {Module|undefined} cjsModuleInstance The CommonJS loader instance
* @param {'commonjs'|undefined} format Intended format of the module.
*/
function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) {
function wrapSafe(filename, content, cjsModuleInstance, format) {
assert(format !== 'module'); // ESM should be handled in loadESMFromCJS().
const hostDefinedOptionId = vm_dynamic_import_default_internal;
const importModuleDynamically = vm_dynamic_import_default_internal;
Expand Down Expand Up @@ -1378,16 +1377,8 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) {
};
}

const isMain = !!(cjsModuleInstance && cjsModuleInstance[kIsMainSymbol]);
const shouldDetectModule = (format !== 'commonjs' && getOptionValue('--experimental-detect-module'));
const result = compileFunctionForCJSLoader(content, filename, isMain, shouldDetectModule);

// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}
const result = compileFunctionForCJSLoader(content, filename, false /* is_sea_main */, shouldDetectModule);

// Cache the source map for the module if present.
if (result.sourceMapURL) {
Expand All @@ -1409,7 +1400,7 @@ Module.prototype._compile = function(content, filename, format) {

let compiledWrapper;
if (format !== 'module') {
const result = wrapSafe(filename, content, this, undefined, format);
const result = wrapSafe(filename, content, this, format);
compiledWrapper = result.function;
if (result.canParseAsESM) {
format = 'module';
Expand Down
2 changes: 1 addition & 1 deletion lib/internal/modules/esm/translators.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ translators.set('module', function moduleStrategy(url, source, isMain) {
* @param {boolean} isMain - Whether the module is the entrypoint
*/
function loadCJSModule(module, source, url, filename, isMain) {
const compileResult = compileFunctionForCJSLoader(source, filename, isMain, false);
const compileResult = compileFunctionForCJSLoader(source, filename, false /* is_sea_main */, false);

const { function: compiledWrapper, sourceMapURL } = compileResult;
// Cache the source map for the cjs module if present.
Expand Down
53 changes: 0 additions & 53 deletions lib/internal/util/embedding.js

This file was deleted.

8 changes: 8 additions & 0 deletions src/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ std::optional<StartExecutionCallbackInfo> CallbackInfoFromArray(
CHECK(process_obj->IsObject());
CHECK(require_fn->IsFunction());
CHECK(runcjs_fn->IsFunction());
// TODO(joyeecheung): some support for running ESM as an entrypoint
// is needed. The simplest API would be to add a run_esm to
// StartExecutionCallbackInfo which compiles, links (to builtins)
// and evaluates a SourceTextModule.
// TODO(joyeecheung): the env pointer should be part of
// StartExecutionCallbackInfo, otherwise embedders are forced to use
// lambdas to pass it into the callback, which can make the code
// difficult to read.
node::StartExecutionCallbackInfo info{process_obj.As<Object>(),
require_fn.As<Function>(),
runcjs_fn.As<Function>()};
Expand Down
60 changes: 35 additions & 25 deletions src/node_contextify.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1453,12 +1453,17 @@ static std::vector<std::string_view> throws_only_in_cjs_error_messages = {
"await is only valid in async functions and "
"the top level bodies of modules"};

static MaybeLocal<Function> CompileFunctionForCJSLoader(Environment* env,
Local<Context> context,
Local<String> code,
Local<String> filename,
bool* cache_rejected,
bool is_cjs_scope) {
// If cached_data is provided, it would be used for the compilation and
// the on-disk compilation cache from NODE_COMPILE_CACHE (if configured)
// would be ignored.
static MaybeLocal<Function> CompileFunctionForCJSLoader(
Environment* env,
Local<Context> context,
Local<String> code,
Local<String> filename,
bool* cache_rejected,
bool is_cjs_scope,
ScriptCompiler::CachedData* cached_data) {
Isolate* isolate = context->GetIsolate();
EscapableHandleScope scope(isolate);

Expand All @@ -1475,25 +1480,9 @@ static MaybeLocal<Function> CompileFunctionForCJSLoader(Environment* env,
false, // is WASM
false, // is ES Module
hdo);
ScriptCompiler::CachedData* cached_data = nullptr;

bool used_cache_from_sea = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
sea::SeaResource sea = sea::FindSingleExecutableResource();
if (sea.use_code_cache()) {
std::string_view data = sea.code_cache.value();
cached_data = new ScriptCompiler::CachedData(
reinterpret_cast<const uint8_t*>(data.data()),
static_cast<int>(data.size()),
v8::ScriptCompiler::CachedData::BufferNotOwned);
used_cache_from_sea = true;
}
}
#endif

CompileCacheEntry* cache_entry = nullptr;
if (!used_cache_from_sea && env->use_compile_cache()) {
if (cached_data == nullptr && env->use_compile_cache()) {
cache_entry = env->compile_cache_handler()->GetOrInsert(
code, filename, CachedCodeType::kCommonJS);
}
Expand Down Expand Up @@ -1559,6 +1548,7 @@ static void CompileFunctionForCJSLoader(
CHECK(args[3]->IsBoolean());
Local<String> code = args[0].As<String>();
Local<String> filename = args[1].As<String>();
bool is_sea_main = args[2].As<Boolean>()->Value();
bool should_detect_module = args[3].As<Boolean>()->Value();

Isolate* isolate = args.GetIsolate();
Expand All @@ -1571,11 +1561,31 @@ static void CompileFunctionForCJSLoader(
Local<Value> cjs_exception;
Local<Message> cjs_message;

ScriptCompiler::CachedData* cached_data = nullptr;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (is_sea_main) {
sea::SeaResource sea = sea::FindSingleExecutableResource();
// Use the "main" field in SEA config for the filename.
Local<Value> filename_from_sea;
if (!ToV8Value(context, sea.code_path).ToLocal(&filename_from_sea)) {
return;
}
filename = filename_from_sea.As<String>();
if (sea.use_code_cache()) {
std::string_view data = sea.code_cache.value();
cached_data = new ScriptCompiler::CachedData(
reinterpret_cast<const uint8_t*>(data.data()),
static_cast<int>(data.size()),
v8::ScriptCompiler::CachedData::BufferNotOwned);
}
}
#endif

{
ShouldNotAbortOnUncaughtScope no_abort_scope(realm->env());
TryCatchScope try_catch(env);
if (!CompileFunctionForCJSLoader(
env, context, code, filename, &cache_rejected, true)
env, context, code, filename, &cache_rejected, true, cached_data)
.ToLocal(&fn)) {
CHECK(try_catch.HasCaught());
CHECK(!try_catch.HasTerminated());
Expand Down Expand Up @@ -1730,7 +1740,7 @@ static void ContainsModuleSyntax(const FunctionCallbackInfo<Value>& args) {
TryCatchScope try_catch(env);
ShouldNotAbortOnUncaughtScope no_abort_scope(env);
if (CompileFunctionForCJSLoader(
env, context, code, filename, &cache_rejected, cjs_var)
env, context, code, filename, &cache_rejected, cjs_var, nullptr)
.ToLocal(&fn)) {
args.GetReturnValue().Set(false);
return;
Expand Down
15 changes: 1 addition & 14 deletions src/node_main_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,20 +103,7 @@ ExitCode NodeMainInstance::Run() {

void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) {
if (*exit_code == ExitCode::kNoFailure) {
bool runs_sea_code = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
sea::SeaResource sea = sea::FindSingleExecutableResource();
if (!sea.use_snapshot()) {
runs_sea_code = true;
std::string_view code = sea.main_code_or_snapshot;
LoadEnvironment(env, code);
}
}
#endif
// Either there is already a snapshot main function from SEA, or it's not
// a SEA at all.
if (!runs_sea_code) {
if (!sea::MaybeLoadSingleExecutableApplication(env)) {
LoadEnvironment(env, StartExecutionCallback{});
}

Expand Down
Loading

0 comments on commit 10099bb

Please sign in to comment.